We truncated the diff of some files because they were too big.
If you want to see the full diff for every file, click here.
Changes of Revision 11
x265.changes
Changed
x
1
2
-------------------------------------------------------------------
3
+Fri Nov 27 18:21:04 UTC 2015 - aloisio@gmx.com
4
+
5
+- Update to version 1.8:
6
+ API Changes:
7
+ * Experimental support for Main12 is now enabled. Partial
8
+ assembly support exists.
9
+ * Main12 and Intra/Still picture profiles are now supported.
10
+ Still picture profile is detected based on
11
+ x265_param::totalFrames.
12
+ * Three classes of encoding statistics are now available
13
+ through the API.
14
+ + x265_stats - contains encoding statistics, available
15
+ through x265_encoder_get_stats()
16
+ + x265_frame_stats and x265_cu_stats - contains frame
17
+ encoding statistics, available through recon x265_picture
18
+ * --csv
19
+ * x265_encoder_log() is now deprecated
20
+ * x265_param::csvfn is also deprecated
21
+ * --log-level now controls only console logging, frame
22
+ level console logging has been removed.
23
+ * Support added for new color transfer characteristic ARIB
24
+ STD-B67
25
+ New Features:
26
+ * limit-refs
27
+ + This feature limits the references analysed for
28
+ individual CUS.
29
+ + Provides a nice tradeoff between efficiency and
30
+ performance.
31
+ + aq-mode 3
32
+ * A new aq-mode that provides additional biasing for
33
+ low-light conditions.
34
+ * An improved scene cut detection logic that allows
35
+ ratecontrol to manage visual quality at fade-ins and
36
+ fade-outs better.
37
+ Preset and Tune Options:
38
+ * tune grain
39
+ + Increases psyRdoq strength to 10.0, and rdoq-level to 2.
40
+ + qg-size
41
+ * Default value changed to 32.
42
+- soname bump to 68
43
+- Reworked arm.patch for 1.8
44
+
45
+-------------------------------------------------------------------
46
Fri May 29 09:11:02 UTC 2015 - aloisio@gmx.com
47
48
- soname bump to 59
49
x265.spec
Changed
26
1
2
# based on the spec file from https://build.opensuse.org/package/view_file/home:Simmphonie/libx265/
3
4
Name: x265
5
-%define soname 59
6
+%define soname 68
7
%define libname lib%{name}
8
%define libsoname %{libname}-%{soname}
9
-Version: 1.7
10
+Version: 1.8
11
Release: 0
12
License: GPL-2.0+
13
Summary: A free h265/HEVC encoder - encoder binary
14
15
streams.
16
17
%prep
18
-%setup -q -n "%{name}_%{version}/build/linux"
19
+%setup -q -n "%{name}_11047/build/linux"
20
cd ../..
21
-%patch0
22
+%patch0 -p1
23
cd -
24
%define FAKE_BUILDDATE %(LC_ALL=C date -u -r %{_sourcedir}/%{name}.changes '+%%b %%e %%Y')
25
sed -i -e "s/0.0/%{soname}.0/g" ../../source/cmake/version.cmake
26
arm.patch
Changed
69
1
2
---- source/CMakeLists.txt.orig 2015-04-28 21:43:18.585528552 +0200
3
-+++ source/CMakeLists.txt 2015-04-28 21:47:14.995334232 +0200
4
-@@ -50,10 +50,18 @@
5
- set(X64 1)
6
- add_definitions(-DX86_64=1)
7
- endif()
8
+Index: x265_11047/source/CMakeLists.txt
9
+===================================================================
10
+--- x265_11047.orig/source/CMakeLists.txt
11
++++ x265_11047/source/CMakeLists.txt
12
+@@ -56,10 +56,22 @@ elseif(POWERMATCH GREATER "-1")
13
+ message(STATUS "Detected POWER target processor")
14
+ set(POWER 1)
15
+ add_definitions(-DX265_ARCH_POWER=1)
16
+elseif(${SYSPROC} MATCHES "armv5.*")
17
+ message(STATUS "Detected ARMV5 system processor")
18
+ set(ARMV5 1)
19
20
+ message(STATUS "Detected ARMV7 system processor")
21
+ set(ARMV7 1)
22
+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0)
23
++elseif(${SYSPROC} STREQUAL "aarch64")
24
++ message(STATUS "Detected AArch64 system processor")
25
++ set(ARMV7 1)
26
++ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0)
27
else()
28
message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown")
29
message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}")
30
-@@ -155,8 +163,8 @@
31
+@@ -169,8 +181,8 @@ if(GCC)
32
elseif(X86 AND NOT X64)
33
add_definitions(-march=i686)
34
endif()
35
36
endif()
37
if(FPROFILE_GENERATE)
38
if(INTEL_CXX)
39
---- source/common/cpu.cpp.orig 2015-04-28 21:47:44.634923269 +0200
40
-+++ source/common/cpu.cpp 2015-04-28 21:49:50.305468867 +0200
41
+Index: x265_11047/source/common/cpu.cpp
42
+===================================================================
43
+--- x265_11047.orig/source/common/cpu.cpp
44
++++ x265_11047/source/common/cpu.cpp
45
@@ -37,7 +37,7 @@
46
#include <machine/cpu.h>
47
#endif
48
49
#include <signal.h>
50
#include <setjmp.h>
51
static sigjmp_buf jmpbuf;
52
-@@ -340,7 +340,6 @@
53
- }
54
-
55
- canjump = 1;
56
-- x265_cpu_neon_test();
57
- canjump = 0;
58
- signal(SIGILL, oldsig);
59
- #endif // if !HAVE_NEON
60
-@@ -356,7 +355,7 @@
61
- // which may result in incorrect detection and the counters stuck enabled.
62
- // right now Apple does not seem to support performance counters for this test
63
- #ifndef __MACH__
64
-- flags |= x265_cpu_fast_neon_mrc_test() ? X265_CPU_FAST_NEON_MRC : 0;
65
-+ //flags |= x265_cpu_fast_neon_mrc_test() ? X265_CPU_FAST_NEON_MRC : 0;
66
- #endif
67
- // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
68
- #endif // if HAVE_ARMV6
69
baselibs.conf
Changed
4
1
2
-libx265-59
3
+libx265-68
4
x265_1.7.tar.gz/source/filters/filters.cpp
Deleted
81
1
2
-/*****************************************************************************
3
- * Copyright (C) 2013 x265 project
4
- *
5
- * Authors: Selvakumar Nithiyaruban <selvakumar@multicorewareinc.com>
6
- *
7
- * This program is free software; you can redistribute it and/or modify
8
- * it under the terms of the GNU General Public License as published by
9
- * the Free Software Foundation; either version 2 of the License, or
10
- * (at your option) any later version.
11
- *
12
- * This program is distributed in the hope that it will be useful,
13
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- * GNU General Public License for more details.
16
- *
17
- * You should have received a copy of the GNU General Public License
18
- * along with this program; if not, write to the Free Software
19
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
- *
21
- * This program is also available under a commercial proprietary license.
22
- * For more information, contact us at license @ x265.com.
23
- *****************************************************************************/
24
-
25
-#include "filters.h"
26
-#include "common.h"
27
-
28
-/* The dithering algorithm is based on Sierra-2-4A error diffusion. */
29
-void ditherPlane(pixel *dst, int dstStride, uint16_t *src, int srcStride,
30
- int width, int height, int16_t *errors, int bitDepth)
31
-{
32
- const int lShift = 16 - bitDepth;
33
- const int rShift = 16 - bitDepth + 2;
34
- const int half = (1 << (16 - bitDepth + 1));
35
- const int pixelMax = (1 << bitDepth) - 1;
36
-
37
- memset(errors, 0, (width + 1) * sizeof(int16_t));
38
- int pitch = 1;
39
- for (int y = 0; y < height; y++, src += srcStride, dst += dstStride)
40
- {
41
- int16_t err = 0;
42
- for (int x = 0; x < width; x++)
43
- {
44
- err = err * 2 + errors[x] + errors[x + 1];
45
- dst[x * pitch] = (pixel)x265_clip3(0, pixelMax, ((src[x * 1] << 2) + err + half) >> rShift);
46
- errors[x] = err = src[x * pitch] - (dst[x * pitch] << lShift);
47
- }
48
- }
49
-}
50
-
51
-void ditherImage(x265_picture& picIn, int picWidth, int picHeight, int16_t *errorBuf, int bitDepth)
52
-{
53
- /* This portion of code is from readFrame in x264. */
54
- for (int i = 0; i < x265_cli_csps[picIn.colorSpace].planes; i++)
55
- {
56
- if ((picIn.bitDepth & 7) && (picIn.bitDepth != 16))
57
- {
58
- /* upconvert non 16bit high depth planes to 16bit */
59
- uint16_t *plane = (uint16_t*)picIn.planes[i];
60
- uint32_t pixelCount = x265_picturePlaneSize(picIn.colorSpace, picWidth, picHeight, i);
61
- int lShift = 16 - picIn.bitDepth;
62
-
63
- /* This loop assumes width is equal to stride which
64
- happens to be true for file reader outputs */
65
- for (uint32_t j = 0; j < pixelCount; j++)
66
- {
67
- plane[j] = plane[j] << lShift;
68
- }
69
- }
70
- }
71
-
72
- for (int i = 0; i < x265_cli_csps[picIn.colorSpace].planes; i++)
73
- {
74
- int height = (int)(picHeight >> x265_cli_csps[picIn.colorSpace].height[i]);
75
- int width = (int)(picWidth >> x265_cli_csps[picIn.colorSpace].width[i]);
76
-
77
- ditherPlane(((pixel*)picIn.planes[i]), picIn.stride[i] / sizeof(pixel), ((uint16_t*)picIn.planes[i]),
78
- picIn.stride[i] / 2, width, height, errorBuf, bitDepth);
79
- }
80
-}
81
x265_1.7.tar.gz/source/filters/filters.h
Deleted
33
1
2
-/*****************************************************************************
3
- * Copyright (C) 2013 x265 project
4
- *
5
- * Authors: Selvakumar Nithiyaruban <selvakumar@multicorewareinc.com>
6
- *
7
- * This program is free software; you can redistribute it and/or modify
8
- * it under the terms of the GNU General Public License as published by
9
- * the Free Software Foundation; either version 2 of the License, or
10
- * (at your option) any later version.
11
- *
12
- * This program is distributed in the hope that it will be useful,
13
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- * GNU General Public License for more details.
16
- *
17
- * You should have received a copy of the GNU General Public License
18
- * along with this program; if not, write to the Free Software
19
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
- *
21
- * This program is also available under a commercial proprietary license.
22
- * For more information, contact us at license @ x265.com.
23
- *****************************************************************************/
24
-
25
-#ifndef X265_FILTERS_H
26
-#define X265_FILTERS_H
27
-
28
-#include "x265.h"
29
-
30
-void ditherImage(x265_picture&, int picWidth, int picHeight, int16_t *errorBuf, int bitDepth);
31
-
32
-#endif //X265_FILTERS_H
33
x265_1.7.tar.gz/.hg_archival.txt -> x265_1.8.tar.gz/.hg_archival.txt
Changed
9
1
2
repo: 09fe40627f03a0f9c3e6ac78b22ac93da23f9fdf
3
-node: 8425278def1edf0931dc33fc518e1950063e76b0
4
+node: 5dcc9d3a928c400b41a3547d7bfee10340519e56
5
branch: stable
6
-tag: 1.7
7
+latesttag: 1.8
8
+latesttagdistance: 1
9
x265_1.7.tar.gz/.hgtags -> x265_1.8.tar.gz/.hgtags
Changed
7
1
2
5e604833c5aa605d0b6efbe5234492b5e7d8ac61 1.4
3
9f0324125f53a12f766f6ed6f98f16e2f42337f4 1.5
4
cbeb7d8a4880e4020c4545dd8e498432c3c6cad3 1.6
5
+8425278def1edf0931dc33fc518e1950063e76b0 1.7
6
+e27327f5da35c5feb660360336fdc94bd0afe719 1.8
7
x265_1.8.tar.gz/build/linux/multilib.sh
Added
43
1
2
+#!/bin/sh
3
+
4
+mkdir -p 8bit 10bit 12bit
5
+
6
+cd 12bit
7
+cmake ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
8
+make ${MAKEFLAGS}
9
+
10
+cd ../10bit
11
+cmake ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
12
+make ${MAKEFLAGS}
13
+
14
+cd ../8bit
15
+ln -sf ../10bit/libx265.a libx265_main10.a
16
+ln -sf ../12bit/libx265.a libx265_main12.a
17
+cmake ../../../source -DEXTRA_LIB="x265_main10.a;x265_main12.a" -DEXTRA_LINK_FLAGS=-L. -DLINKED_10BIT=ON -DLINKED_12BIT=ON
18
+make ${MAKEFLAGS}
19
+
20
+# rename the 8bit library, then combine all three into libx265.a
21
+mv libx265.a libx265_main.a
22
+
23
+uname=`uname`
24
+if [ "$uname" = "Linux" ]
25
+then
26
+
27
+# On Linux, we use GNU ar to combine the static libraries together
28
+ar -M <<EOF
29
+CREATE libx265.a
30
+ADDLIB libx265_main.a
31
+ADDLIB libx265_main10.a
32
+ADDLIB libx265_main12.a
33
+SAVE
34
+END
35
+EOF
36
+
37
+else
38
+
39
+# Mac/BSD libtool
40
+libtool -static -o libx265.a libx265_main.a libx265_main10.a libx265_main12.a 2>/dev/null
41
+
42
+fi
43
x265_1.8.tar.gz/build/msys/multilib.sh
Added
31
1
2
+#!/bin/sh
3
+
4
+mkdir -p 8bit 10bit 12bit
5
+
6
+cd 12bit
7
+cmake -G "MSYS Makefiles" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
8
+make ${MAKEFLAGS}
9
+cp libx265.a ../8bit/libx265_main12.a
10
+
11
+cd ../10bit
12
+cmake -G "MSYS Makefiles" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
13
+make ${MAKEFLAGS}
14
+cp libx265.a ../8bit/libx265_main10.a
15
+
16
+cd ../8bit
17
+cmake -G "MSYS Makefiles" ../../../source -DEXTRA_LIB="x265_main10.a;x265_main12.a" -DEXTRA_LINK_FLAGS=-L. -DLINKED_10BIT=ON -DLINKED_12BIT=ON
18
+make ${MAKEFLAGS}
19
+
20
+# rename the 8bit library, then combine all three into libx265.a using GNU ar
21
+mv libx265.a libx265_main.a
22
+
23
+ar -M <<EOF
24
+CREATE libx265.a
25
+ADDLIB libx265_main.a
26
+ADDLIB libx265_main10.a
27
+ADDLIB libx265_main12.a
28
+SAVE
29
+END
30
+EOF
31
x265_1.8.tar.gz/build/vc10-x86_64/multilib.bat
Added
46
1
2
+@echo off
3
+if "%VS100COMNTOOLS%" == "" (
4
+ msg "%username%" "Visual Studio 10 not detected"
5
+ exit 1
6
+)
7
+
8
+call "%VS100COMNTOOLS%\..\..\VC\vcvarsall.bat"
9
+
10
+@mkdir 12bit
11
+@mkdir 10bit
12
+@mkdir 8bit
13
+
14
+@cd 12bit
15
+cmake -G "Visual Studio 10 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
16
+if exist x265.sln (
17
+ MSBuild /property:Configuration="Release" x265.sln
18
+ copy/y Release\x265-static.lib ..\8bit\x265-static-main12.lib
19
+)
20
+
21
+@cd ..\10bit
22
+cmake -G "Visual Studio 10 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
23
+if exist x265.sln (
24
+ MSBuild /property:Configuration="Release" x265.sln
25
+ copy/y Release\x265-static.lib ..\8bit\x265-static-main10.lib
26
+)
27
+
28
+@cd ..\8bit
29
+if not exist x265-static-main10.lib (
30
+ msg "%username%" "10bit build failed"
31
+ exit 1
32
+)
33
+if not exist x265-static-main12.lib (
34
+ msg "%username%" "12bit build failed"
35
+ exit 1
36
+)
37
+cmake -G "Visual Studio 10 Win64" ../../../source -DEXTRA_LIB="x265-static-main10.lib;x265-static-main12.lib" -DLINKED_10BIT=ON -DLINKED_12BIT=ON
38
+if exist x265.sln (
39
+ MSBuild /property:Configuration="Release" x265.sln
40
+ :: combine static libraries (ignore warnings caused by winxp.cpp hacks)
41
+ move Release\x265-static.lib x265-static-main.lib
42
+ LIB.EXE /ignore:4006 /ignore:4221 /OUT:Release\x265-static.lib x265-static-main.lib x265-static-main10.lib x265-static-main12.lib
43
+)
44
+
45
+pause
46
x265_1.8.tar.gz/build/vc11-x86_64/multilib.bat
Added
46
1
2
+@echo off
3
+if "%VS110COMNTOOLS%" == "" (
4
+ msg "%username%" "Visual Studio 11 not detected"
5
+ exit 1
6
+)
7
+
8
+call "%VS110COMNTOOLS%\..\..\VC\vcvarsall.bat"
9
+
10
+@mkdir 12bit
11
+@mkdir 10bit
12
+@mkdir 8bit
13
+
14
+@cd 12bit
15
+cmake -G "Visual Studio 11 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
16
+if exist x265.sln (
17
+ MSBuild /property:Configuration="Release" x265.sln
18
+ copy/y Release\x265-static.lib ..\8bit\x265-static-main12.lib
19
+)
20
+
21
+@cd ..\10bit
22
+cmake -G "Visual Studio 11 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
23
+if exist x265.sln (
24
+ MSBuild /property:Configuration="Release" x265.sln
25
+ copy/y Release\x265-static.lib ..\8bit\x265-static-main10.lib
26
+)
27
+
28
+@cd ..\8bit
29
+if not exist x265-static-main10.lib (
30
+ msg "%username%" "10bit build failed"
31
+ exit 1
32
+)
33
+if not exist x265-static-main12.lib (
34
+ msg "%username%" "12bit build failed"
35
+ exit 1
36
+)
37
+cmake -G "Visual Studio 11 Win64" ../../../source -DEXTRA_LIB="x265-static-main10.lib;x265-static-main12.lib" -DLINKED_10BIT=ON -DLINKED_12BIT=ON
38
+if exist x265.sln (
39
+ MSBuild /property:Configuration="Release" x265.sln
40
+ :: combine static libraries (ignore warnings caused by winxp.cpp hacks)
41
+ move Release\x265-static.lib x265-static-main.lib
42
+ LIB.EXE /ignore:4006 /ignore:4221 /OUT:Release\x265-static.lib x265-static-main.lib x265-static-main10.lib x265-static-main12.lib
43
+)
44
+
45
+pause
46
x265_1.8.tar.gz/build/vc12-x86_64/multilib.bat
Added
46
1
2
+@echo off
3
+if "%VS120COMNTOOLS%" == "" (
4
+ msg "%username%" "Visual Studio 12 not detected"
5
+ exit 1
6
+)
7
+
8
+call "%VS120COMNTOOLS%\..\..\VC\vcvarsall.bat"
9
+
10
+@mkdir 12bit
11
+@mkdir 10bit
12
+@mkdir 8bit
13
+
14
+@cd 12bit
15
+cmake -G "Visual Studio 12 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
16
+if exist x265.sln (
17
+ MSBuild /property:Configuration="Release" x265.sln
18
+ copy/y Release\x265-static.lib ..\8bit\x265-static-main12.lib
19
+)
20
+
21
+@cd ..\10bit
22
+cmake -G "Visual Studio 12 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
23
+if exist x265.sln (
24
+ MSBuild /property:Configuration="Release" x265.sln
25
+ copy/y Release\x265-static.lib ..\8bit\x265-static-main10.lib
26
+)
27
+
28
+@cd ..\8bit
29
+if not exist x265-static-main10.lib (
30
+ msg "%username%" "10bit build failed"
31
+ exit 1
32
+)
33
+if not exist x265-static-main12.lib (
34
+ msg "%username%" "12bit build failed"
35
+ exit 1
36
+)
37
+cmake -G "Visual Studio 12 Win64" ../../../source -DEXTRA_LIB="x265-static-main10.lib;x265-static-main12.lib" -DLINKED_10BIT=ON -DLINKED_12BIT=ON
38
+if exist x265.sln (
39
+ MSBuild /property:Configuration="Release" x265.sln
40
+ :: combine static libraries (ignore warnings caused by winxp.cpp hacks)
41
+ move Release\x265-static.lib x265-static-main.lib
42
+ LIB.EXE /ignore:4006 /ignore:4221 /OUT:Release\x265-static.lib x265-static-main.lib x265-static-main10.lib x265-static-main12.lib
43
+)
44
+
45
+pause
46
x265_1.8.tar.gz/build/vc9-x86_64/multilib.bat
Added
46
1
2
+@echo off
3
+if "%VS90COMNTOOLS%" == "" (
4
+ msg "%username%" "Visual Studio 9 not detected"
5
+ exit 1
6
+)
7
+
8
+call "%VS90COMNTOOLS%\..\..\VC\vcvarsall.bat"
9
+
10
+@mkdir 12bit
11
+@mkdir 10bit
12
+@mkdir 8bit
13
+
14
+@cd 12bit
15
+cmake -G "Visual Studio 9 2008 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF -DMAIN12=ON
16
+if exist x265.sln (
17
+ MSBuild /property:Configuration="Release" x265.sln
18
+ copy/y Release\x265-static.lib ..\8bit\x265-static-main12.lib
19
+)
20
+
21
+@cd ..\10bit
22
+cmake -G "Visual Studio 9 2008 Win64" ../../../source -DHIGH_BIT_DEPTH=ON -DEXPORT_C_API=OFF -DENABLE_SHARED=OFF -DENABLE_CLI=OFF
23
+if exist x265.sln (
24
+ MSBuild /property:Configuration="Release" x265.sln
25
+ copy/y Release\x265-static.lib ..\8bit\x265-static-main10.lib
26
+)
27
+
28
+@cd ..\8bit
29
+if not exist x265-static-main10.lib (
30
+ msg "%username%" "10bit build failed"
31
+ exit 1
32
+)
33
+if not exist x265-static-main12.lib (
34
+ msg "%username%" "12bit build failed"
35
+ exit 1
36
+)
37
+cmake -G "Visual Studio 9 2008 Win64" ../../../source -DEXTRA_LIB="x265-static-main10.lib;x265-static-main12.lib" -DLINKED_10BIT=ON -DLINKED_12BIT=ON
38
+if exist x265.sln (
39
+ MSBuild /property:Configuration="Release" x265.sln
40
+ :: combine static libraries (ignore warnings caused by winxp.cpp hacks)
41
+ move Release\x265-static.lib x265-static-main.lib
42
+ LIB.EXE /ignore:4006 /ignore:4221 /OUT:Release\x265-static.lib x265-static-main.lib x265-static-main10.lib x265-static-main12.lib
43
+)
44
+
45
+pause
46
x265_1.7.tar.gz/doc/reST/api.rst -> x265_1.8.tar.gz/doc/reST/api.rst
Changed
201
1
2
x265 will accept input pixels of any depth between 8 and 16 bits
3
regardless of the depth of its internal pixels (8 or 10). It will shift
4
and mask input pixels as required to reach the internal depth. If
5
-downshifting is being performed using our CLI application, the
6
-:option:`--dither` option may be enabled to reduce banding. This feature
7
-is not available through the C interface.
8
+downshifting is being performed using our CLI application (to 8 bits),
9
+the :option:`--dither` option may be enabled to reduce banding. This
10
+feature is not available through the C interface.
11
12
Encoder
13
=======
14
15
helps future-proof your code in many ways, but the x265 API is
16
versioned in such a way that we prevent linkage against a build of
17
x265 that does not match the version of the header you are compiling
18
- against. This is function of the X265_BUILD macro.
19
+ against (unless you use x265_api_query() to acquire the library's
20
+ interfaces). This is function of the X265_BUILD macro.
21
22
**x265_encoder_parameters()** may be used to get a copy of the param
23
structure from the encoder after it has been opened, in order to see the
24
25
* presets is not recommended without a more fine-grained breakdown of
26
* parameters to take this into account. */
27
int x265_encoder_reconfig(x265_encoder *, x265_param *);
28
-
29
+
30
Pictures
31
========
32
33
34
provided, the encoder will fill it with data pertaining to the
35
output picture corresponding to the output NALs, including the
36
recontructed image, POC and decode timestamp. These pictures will be
37
- in encode (or decode) order.
38
+ in encode (or decode) order. The encoder will also write corresponding
39
+ frame encode statistics into **x265_frame_stats**.
40
41
When the last of the raw input pictures has been sent to the encoder,
42
**x265_encoder_encode()** must still be called repeatedly with a
43
44
Cleanup
45
=======
46
47
-At the end of the encode, the application will want to trigger logging
48
-of the final encode statistics, if :option:`--csv` had been specified::
49
-
50
- /* x265_encoder_log:
51
- * write a line to the configured CSV file. If a CSV filename was not
52
- * configured, or file open failed, or the log level indicated frame level
53
- * logging, this function will perform no write. */
54
- void x265_encoder_log(x265_encoder *encoder, int argc, char **argv);
55
-
56
Finally, the encoder must be closed in order to free all of its
57
resources. An encoder that has been flushed cannot be restarted and
58
reused. Once **x265_encoder_close()** has been called, the encoder
59
60
Multi-library Interface
61
=======================
62
63
-If your application might want to make a runtime selection between
64
-a number of libx265 libraries (perhaps 8bpp and 16bpp), then you will
65
-want to use the multi-library interface.
66
-
67
-Instead of directly using all of the **x265_** methods documented
68
-above, you query an x265_api structure from your libx265 and then use
69
-the function pointers within that structure of the same name, but
70
-without the **x265_** prefix. So **x265_param_default()** becomes
71
-**api->param_default()**. The key method is x265_api_get()::
72
-
73
- /* x265_api_get:
74
- * Retrieve the programming interface for a linked x265 library.
75
- * May return NULL if no library is available that supports the
76
- * requested bit depth. If bitDepth is 0, the function is guarunteed
77
- * to return a non-NULL x265_api pointer from the system default
78
- * libx265 */
79
- const x265_api* x265_api_get(int bitDepth);
80
-
81
-Note that using this multi-library API in your application is only the
82
-first step.
83
-
84
-Your application must link to one build of libx265 (statically or
85
-dynamically) and this linked version of libx265 will support one
86
-bit-depth (8 or 10 bits).
87
-
88
-Your application must now request the API for the bitDepth you would
89
-prefer the encoder to use (8 or 10). If the requested bitdepth is zero,
90
-or if it matches the bitdepth of the system default libx265 (the
91
-currently linked library), then this library will be used for encode.
92
-If you request a different bit-depth, the linked libx265 will attempt
93
-to dynamically bind a shared library with a name appropriate for the
94
-requested bit-depth:
95
-
96
- 8-bit: libx265_main.dll
97
- 10-bit: libx265_main10.dll
98
-
99
- (the shared library extension is obviously platform specific. On
100
- Linux it is .so while on Mac it is .dylib)
101
-
102
-For example on Windows, one could package together an x265.exe
103
-statically linked against the 8bpp libx265 together with a
104
-libx265_main10.dll in the same folder, and this executable would be able
105
-to encode main and main10 bitstreams.
106
-
107
-On Linux, x265 packagers could install 8bpp static and shared libraries
108
-under the name libx265 (so all applications link against 8bpp libx265)
109
-and then also install libx265_main10.so (symlinked to its numbered solib).
110
-Thus applications which use x265_api_get() will be able to generate main
111
-or main10 bitstreams.
112
+If your application might want to make a runtime bit-depth selection, it
113
+will need to use one of these bit-depth introspection interfaces which
114
+returns an API structure containing the public function entry points and
115
+constants.
116
+
117
+Instead of directly using all of the **x265_** methods documented above,
118
+you query an x265_api structure from your libx265 and then use the
119
+function pointers of the same name (minus the **x265_** prefix) within
120
+that structure. For instance **x265_param_default()** becomes
121
+**api->param_default()**.
122
+
123
+x265_api_get
124
+------------
125
+
126
+The first bit-depth instrospecton method is x265_api_get(). It designed
127
+for applications that might statically link with libx265, or will at
128
+least be tied to a particular SONAME or API version::
129
+
130
+ /* x265_api_get:
131
+ * Retrieve the programming interface for a linked x265 library.
132
+ * May return NULL if no library is available that supports the
133
+ * requested bit depth. If bitDepth is 0, the function is guarunteed
134
+ * to return a non-NULL x265_api pointer from the system default
135
+ * libx265 */
136
+ const x265_api* x265_api_get(int bitDepth);
137
+
138
+Like **x265_encoder_encode()**, this function has the build number
139
+automatically appended to the function name via macros. This ties your
140
+application to a particular binary API version of libx265 (the one you
141
+compile against). If you attempt to link with a libx265 with a different
142
+API version number, the link will fail.
143
+
144
+Obviously this has no meaningful effect on applications which statically
145
+link to libx265.
146
+
147
+x265_api_query
148
+--------------
149
+
150
+The second bit-depth introspection method is designed for applications
151
+which need more flexibility in API versioning. If you use
152
+**x265_api_query()** and dynamically link to libx265 at runtime (using
153
+dlopen() on POSIX or LoadLibrary() on Windows) your application is no
154
+longer directly tied to the API version that it was compiled against::
155
+
156
+ /* x265_api_query:
157
+ * Retrieve the programming interface for a linked x265 library, like
158
+ * x265_api_get(), except this function accepts X265_BUILD as the second
159
+ * argument rather than using the build number as part of the function name.
160
+ * Applications which dynamically link to libx265 can use this interface to
161
+ * query the library API and achieve a relative amount of version skew
162
+ * flexibility. The function may return NULL if the library determines that
163
+ * the apiVersion that your application was compiled against is not compatible
164
+ * with the library you have linked with.
165
+ *
166
+ * api_major_version will be incremented any time non-backward compatible
167
+ * changes are made to any public structures or functions. If
168
+ * api_major_version does not match X265_MAJOR_VERSION from the x265.h your
169
+ * application compiled against, your application must not use the returned
170
+ * x265_api pointer.
171
+ *
172
+ * Users of this API *must* also validate the sizes of any structures which
173
+ * are not treated as opaque in application code. For instance, if your
174
+ * application dereferences a x265_param pointer, then it must check that
175
+ * api->sizeof_param matches the sizeof(x265_param) that your application
176
+ * compiled with. */
177
+ const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err);
178
+
179
+A number of validations must be performed on the returned API structure
180
+in order to determine if it is safe for use by your application. If you
181
+do not perform these checks, your application is liable to crash::
182
+
183
+ if (api->api_major_version != X265_MAJOR_VERSION) /* do not use */
184
+ if (api->sizeof_param != sizeof(x265_param)) /* do not use */
185
+ if (api->sizeof_picture != sizeof(x265_picture)) /* do not use */
186
+ if (api->sizeof_stats != sizeof(x265_stats)) /* do not use */
187
+ if (api->sizeof_zone != sizeof(x265_zone)) /* do not use */
188
+ etc.
189
+
190
+Note that if your application does not directly allocate or dereference
191
+one of these structures, if it treats the structure as opaque or does
192
+not use it at all, then it can skip the size check for that structure.
193
+
194
+In particular, if your application uses api->param_alloc(),
195
+api->param_free(), api->param_parse(), etc and never directly accesses
196
+any x265_param fields, then it can skip the check on the
197
+sizeof(x265_parm) and thereby ignore changes to that structure (which
198
+account for a large percentage of X265_BUILD bumps).
199
+
200
+Build Implications
201
x265_1.7.tar.gz/doc/reST/cli.rst -> x265_1.8.tar.gz/doc/reST/cli.rst
Changed
201
1
2
3
Generally, when an option expects a string value from a list of strings
4
the user may specify the integer ordinal of the value they desire. ie:
5
-:option:`--log-level` 4 is equivalent to :option:`--log-level` debug.
6
+:option:`--log-level` 3 is equivalent to :option:`--log-level` debug.
7
8
Executable Options
9
==================
10
11
2. unable to open encoder
12
3. unable to generate stream headers
13
4. encoder abort
14
+ 5. unable to open csv file
15
16
Logging/Statistic Options
17
=========================
18
19
0. error
20
1. warning
21
2. info **(default)**
22
- 3. frame
23
- 4. debug
24
- 5. full
25
+ 3. debug
26
+ 4. full
27
28
.. option:: --no-progress
29
30
31
.. option:: --csv <filename>
32
33
Writes encoding results to a comma separated value log file. Creates
34
- the file if it doesnt already exist, else adds one line per run. if
35
- :option:`--log-level` is frame or above, it writes one line per
36
- frame. Default none
37
+ the file if it doesnt already exist. If :option:`--csv-log-level` is 0,
38
+ it adds one line per run. If :option:`--csv-log-level` is greater than
39
+ 0, it writes one line per frame. Default none
40
41
When frame level logging is enabled, several frame performance
42
statistics are listed:
43
44
enough ahead for the necessary reference data to be available. This
45
is more of a problem for P frames where some blocks are much more
46
expensive than others.
47
+
48
+ **CLI ONLY**
49
50
+.. option:: --csv-log-level <integer>
51
52
-.. option:: --cu-stats, --no-cu-stats
53
+ CSV logging level. Default 0
54
+ 0. summary
55
+ 1. frame level logging
56
+ 2. frame level logging with performance statistics
57
58
- Records statistics on how each CU was coded (split depths and other
59
- mode decisions) and reports those statistics at the end of the
60
- encode. Default disabled
61
+ **CLI ONLY**
62
63
.. option:: --ssim, --no-ssim
64
65
66
67
**CLI ONLY**
68
69
+.. option:: --total-frames <integer>
70
+
71
+ The number of frames intended to be encoded. It may be left
72
+ unspecified, but when it is specified rate control can make use of
73
+ this information. It is also used to determine if an encode is
74
+ actually a stillpicture profile encode (single frame)
75
+
76
.. option:: --dither
77
78
Enable high quality downscaling. Dithering is based on the diffusion
79
80
81
**Range of values:** positive int or float, or num/denom
82
83
-.. option:: --interlaceMode <false|tff|bff>, --no-interlaceMode
84
+.. option:: --interlace <false|tff|bff>, --no-interlace
85
86
0. progressive pictures **(default)**
87
1. top field first
88
89
90
**CLI ONLY**
91
92
-.. option:: --output-depth, -D 8|10
93
+.. option:: --output-depth, -D 8|10|12
94
95
Bitdepth of output HEVC bitstream, which is also the internal bit
96
depth of the encoder. If the requested bit depth is not the bit
97
depth of the linked libx265, it will attempt to bind libx265_main
98
- for an 8bit encoder, or libx265_main10 for a 10bit encoder, with the
99
+ for an 8bit encoder, libx265_main10 for a 10bit encoder, or
100
+ libx265_main12 for a 12bit encoder (EXPERIMENTAL), with the
101
same API version as the linked libx265.
102
103
+ If the output depth is not specified but :option:`--profile` is
104
+ specified, the output depth will be derived from the profile name.
105
+
106
**CLI ONLY**
107
108
Profile, Level, Tier
109
110
profile. May abort the encode if the specified profile is
111
impossible to be supported by the compile options chosen for the
112
encoder (a high bit depth encoder will be unable to output
113
- bitstreams compliant with Main or Mainstillpicture).
114
+ bitstreams compliant with Main or MainStillPicture).
115
+
116
+ The following profiles are supported in x265.
117
+
118
+ 8bit profiles::
119
+
120
+ main, main-intra, mainstillpicture (or msp for short)
121
+ main444-8 main444-intra main444-stillpicture
122
+ See note below on signaling intra and stillpicture profiles.
123
+
124
+ 10bit profiles::
125
+
126
+ main10, main10-intra
127
+ main422-10, main422-10-intra
128
+ main444-10, main444-10-intra
129
+
130
+ 12bit profiles::
131
+
132
+ main12, main12-intra
133
+ main422-12, main422-12-intra
134
+ main444-12, main444-12-intra
135
+
136
+
137
+ **CLI ONLY**
138
139
- API users must use x265_param_apply_profile() after configuring
140
+ API users must call x265_param_apply_profile() after configuring
141
their param structure. Any changes made to the param structure after
142
this call might make the encode non-compliant.
143
144
- **Values:** main, main10, mainstillpicture, main422-8, main422-10, main444-8, main444-10
145
+ The CLI application will derive the output bit depth from the
146
+ profile name if :option:`--output-depth` is not specified.
147
148
- **CLI ONLY**
149
+.. note::
150
+
151
+ All 12bit presets are extremely unstable, do not use them yet.
152
+ 16bit is not supported at all, but those profiles are included
153
+ because it is possible for libx265 to make bitstreams compatible
154
+ with them.
155
156
.. option:: --level-idc <integer|float>
157
158
159
specified level, main tier first, turning on high tier only if
160
necessary and available at that level.
161
162
+ If :option:`--level-idc` has not been specified, this argument is
163
+ ignored.
164
+
165
.. option:: --ref <1..16>
166
167
Max number of L0 references to be allowed. This number has a linear
168
169
Default: disabled
170
171
.. note::
172
+
173
:option:`--profile`, :option:`--level-idc`, and
174
:option:`--high-tier` are only intended for use when you are
175
targeting a particular decoder (or decoders) with fixed resource
176
177
parameters to meet those requirements but it will never raise
178
them. It may enable VBV constraints on a CRF encode.
179
180
+ Also note that x265 determines the decoder requirement profile and
181
+ level in three steps. First, the user configures an x265_param
182
+ structure with their suggested encoder options and then optionally
183
+ calls x265_param_apply_profile() to enforce a specific profile
184
+ (main, main10, etc). Second, an encoder is created from this
185
+ x265_param instance and the :option:`--level-idc` and
186
+ :option:`--high-tier` parameters are used to reduce bitrate or other
187
+ features in order to enforce the target level. Finally, the encoder
188
+ re-examines the final set of parameters and detects the actual
189
+ minimum decoder requirement level and this is what is signaled in
190
+ the bitstream headers. The detected decoder level will only use High
191
+ tier if the user specified a High tier level.
192
+
193
+ The signaled profile will be determined by the encoder's internal
194
+ bitdepth and input color space. If :option:`--keyint` is 0 or 1,
195
+ then an intra variant of the profile will be signaled.
196
+
197
+ If :option:`--total-frames` is 1, then a stillpicture variant will
198
+ be signaled, but this parameter is not always set by applications,
199
+ particularly not when the CLI uses stdin streaming or when libx265
200
+ is used by third-party applications.
201
x265_1.7.tar.gz/doc/reST/presets.rst -> x265_1.8.tar.gz/doc/reST/presets.rst
Changed
18
1
2
~~~~~~~~~~~~~~~~~~~~
3
4
:option:`--tune` *grain* tries to improve the retention of film grain in
5
-the reconstructed output. It helps rate distortion optimizations select
6
-modes which preserve high frequency noise:
7
+the reconstructed output. It disables rate distortion optimizations in
8
+quantization, and increases the default psy-rd.
9
10
* :option:`--psy-rd` 0.5
11
- * :option:`--rdoq-level` 1
12
- * :option:`--psy-rdoq` 30
13
+ * :option:`--rdoq-level` 0
14
+ * :option:`--psy-rdoq` 0
15
16
It lowers the strength of adaptive quantization, so residual energy can
17
be more evenly distributed across the (noisy) picture:
18
x265_1.7.tar.gz/doc/reST/threading.rst -> x265_1.8.tar.gz/doc/reST/threading.rst
Changed
62
1
2
providers are recommended to call this method when they make new jobs
3
available.
4
5
-Worker jobs are not allowed to block except when abosultely necessary
6
+Worker jobs are not allowed to block except when absolutely necessary
7
for data locking. If a job becomes blocked, the work function is
8
expected to drop that job so the worker thread may go back to the pool
9
and find more work.
10
11
12
If a worker thread job has work which can be performed in parallel by
13
many threads, it may allocate a bonded task group and enlist the help of
14
-other idle worker threads in the same pool. Those threads will cooperate
15
-to complete the work of the bonded task group and then return to their
16
-idle states. The larger and more uniform those tasks are, the better the
17
-bonded task group will perform.
18
+other idle worker threads from the same thread pool. Those threads will
19
+cooperate to complete the work of the bonded task group and then return
20
+to their idle states. The larger and more uniform those tasks are, the
21
+better the bonded task group will perform.
22
23
Parallel Mode Analysis
24
~~~~~~~~~~~~~~~~~~~~~~
25
26
When :option:`--pmode` is enabled, each CU (at all depths from 64x64 to
27
8x8) will distribute its analysis work to the thread pool via a bonded
28
task group. Each analysis job will measure the cost of one prediction
29
-for the CU: merge, skip, intra, inter (2Nx2N, Nx2N, 2NxN, and AMP). At
30
-slower presets, the amount of increased parallelism is often enough to
31
-be able to reduce frame parallelism while achieving the same overall CPU
32
-utilization. Reducing frame threads is often beneficial to ABR and VBV
33
-rate control.
34
+for the CU: merge, skip, intra, inter (2Nx2N, Nx2N, 2NxN, and AMP).
35
+
36
+At slower presets, the amount of increased parallelism from pmode is
37
+often enough to be able to reduce or disable frame parallelism while
38
+achieving the same overall CPU utilization. Reducing frame threads is
39
+often beneficial to ABR and VBV rate control.
40
41
Parallel Motion Estimation
42
~~~~~~~~~~~~~~~~~~~~~~~~~~
43
44
When :option:`--pme` is enabled all of the analysis functions which
45
perform motion searches to reference frames will distribute those motion
46
-searches as jobs for worker threads via a bonded task group (if more
47
-than two motion searches are required).
48
+searches to other worker threads via a bonded task group (if more than
49
+two motion searches are required).
50
51
Frame Threading
52
===============
53
54
bonded task groups to measure single frame cost estimates using slices.
55
(see :option:`--lookahead-slices`)
56
57
-The function slicetypeDecide() itself is also be performed by a worker
58
+The main slicetypeDecide() function itself is also performed by a worker
59
thread if your encoder has a thread pool, else it runs within the
60
context of the thread which calls the x265_encoder_encode().
61
62
x265_1.7.tar.gz/source/CMakeLists.txt -> x265_1.8.tar.gz/source/CMakeLists.txt
Changed
185
1
2
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
3
4
# X265_BUILD must be incremented each time the public API is changed
5
-set(X265_BUILD 59)
6
+set(X265_BUILD 68)
7
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
8
"${PROJECT_BINARY_DIR}/x265.def")
9
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
10
11
string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SYSPROC)
12
set(X86_ALIASES x86 i386 i686 x86_64 amd64)
13
list(FIND X86_ALIASES "${SYSPROC}" X86MATCH)
14
+set(POWER_ALIASES ppc64 ppc64le)
15
+list(FIND POWER_ALIASES "${SYSPROC}" POWERMATCH)
16
if("${SYSPROC}" STREQUAL "" OR X86MATCH GREATER "-1")
17
message(STATUS "Detected x86 target processor")
18
set(X86 1)
19
20
set(X64 1)
21
add_definitions(-DX86_64=1)
22
endif()
23
+elseif(POWERMATCH GREATER "-1")
24
+ message(STATUS "Detected POWER target processor")
25
+ set(POWER 1)
26
+ add_definitions(-DX265_ARCH_POWER=1)
27
elseif(${SYSPROC} STREQUAL "armv6l")
28
message(STATUS "Detected ARM target processor")
29
set(ARM 1)
30
31
endif()
32
endif()
33
mark_as_advanced(LIBRT NUMA_FOUND)
34
+ option(NO_ATOMICS "Use a slow mutex to replace atomics" OFF)
35
+ if(NO_ATOMICS)
36
+ add_definitions(-DNO_ATOMICS=1)
37
+ endif(NO_ATOMICS)
38
endif(UNIX)
39
40
if(X64 AND NOT WIN32)
41
42
message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build assembly primitives")
43
option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" ON)
44
endif()
45
+else()
46
+ option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" OFF)
47
endif()
48
49
option(CHECKED_BUILD "Enable run-time sanity checks (debugging)" OFF)
50
51
# Build options
52
set(LIB_INSTALL_DIR lib CACHE STRING "Install location of libraries")
53
set(BIN_INSTALL_DIR bin CACHE STRING "Install location of executables")
54
+set(EXTRA_LIB "" CACHE STRING "Extra libraries to link against")
55
+set(EXTRA_LINK_FLAGS "" CACHE STRING "Extra link flags")
56
+if(EXTRA_LINK_FLAGS)
57
+ list(APPEND LINKER_OPTIONS ${EXTRA_LINK_FLAGS})
58
+endif()
59
+if(EXTRA_LIB)
60
+ option(LINKED_8BIT "8bit libx265 is being linked with this library" OFF)
61
+ option(LINKED_10BIT "10bit libx265 is being linked with this library" OFF)
62
+ option(LINKED_12BIT "12bit libx265 is being linked with this library" OFF)
63
+endif(EXTRA_LIB)
64
+mark_as_advanced(EXTRA_LIB EXTRA_LINK_FLAGS)
65
66
if(X64)
67
- # NOTE: We only officially support 16bit-per-pixel compiles of x265
68
- # on 64bit architectures. 16bpp plus large resolution plus slow
69
+ # NOTE: We only officially support high-bit-depth compiles of x265
70
+ # on 64bit architectures. Main10 plus large resolution plus slow
71
# preset plus 32bit address space usually means malloc failure. You
72
# can disable this if(X64) check if you desparately need a 32bit
73
# build with 10bit/12bit support, but this violates the "shrink wrap
74
# license" so to speak. If it breaks you get to keep both halves.
75
- # You will likely need to compile without assembly
76
- option(HIGH_BIT_DEPTH "Store pixels as 16bit values" OFF)
77
+ # You will need to disable assembly manually.
78
+ option(HIGH_BIT_DEPTH "Store pixel samples as 16bit values (Main10/Main12)" OFF)
79
endif(X64)
80
if(HIGH_BIT_DEPTH)
81
- add_definitions(-DHIGH_BIT_DEPTH=1)
82
+ option(MAIN12 "Support Main12 instead of Main10" OFF)
83
+ if(MAIN12)
84
+ add_definitions(-DHIGH_BIT_DEPTH=1 -DX265_DEPTH=12)
85
+ else()
86
+ add_definitions(-DHIGH_BIT_DEPTH=1 -DX265_DEPTH=10)
87
+ endif()
88
else(HIGH_BIT_DEPTH)
89
- add_definitions(-DHIGH_BIT_DEPTH=0)
90
+ add_definitions(-DHIGH_BIT_DEPTH=0 -DX265_DEPTH=8)
91
endif(HIGH_BIT_DEPTH)
92
93
+# this option can only be used when linking multiple libx265 libraries
94
+# together, and some alternate API access method is implemented.
95
+option(EXPORT_C_API "Implement public C programming interface" ON)
96
+mark_as_advanced(EXPORT_C_API)
97
+if(EXPORT_C_API)
98
+ set(X265_NS x265)
99
+ add_definitions(-DEXPORT_C_API=1)
100
+elseif(HIGH_BIT_DEPTH)
101
+ if(MAIN12)
102
+ set(X265_NS x265_12bit)
103
+ else()
104
+ set(X265_NS x265_10bit)
105
+ endif()
106
+ add_definitions(-DEXPORT_C_API=0)
107
+else()
108
+ set(X265_NS x265_8bit)
109
+ add_definitions(-DEXPORT_C_API=0)
110
+endif()
111
+add_definitions(-DX265_NS=${X265_NS})
112
+
113
option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF)
114
if(WARNINGS_AS_ERRORS)
115
if(GCC)
116
117
if(NOT MSVC)
118
set_target_properties(x265-static PROPERTIES OUTPUT_NAME x265)
119
endif()
120
+if(EXTRA_LIB)
121
+ target_link_libraries(x265-static ${EXTRA_LIB})
122
+endif()
123
install(TARGETS x265-static
124
LIBRARY DESTINATION ${LIB_INSTALL_DIR}
125
ARCHIVE DESTINATION ${LIB_INSTALL_DIR})
126
127
if(APPLE)
128
set_target_properties(x265-shared PROPERTIES MACOSX_RPATH 1)
129
else()
130
- set_target_properties(x265-shared PROPERTIES LINK_FLAGS "-Wl,-Bsymbolic,-znoexecstack")
131
+ list(APPEND LINKER_OPTIONS "-Wl,-Bsymbolic,-znoexecstack")
132
endif()
133
endif()
134
set_target_properties(x265-shared PROPERTIES SOVERSION ${X265_BUILD})
135
136
ARCHIVE DESTINATION ${LIB_INSTALL_DIR}
137
RUNTIME DESTINATION ${BIN_INSTALL_DIR})
138
endif()
139
+ if(EXTRA_LIB)
140
+ target_link_libraries(x265-shared ${EXTRA_LIB})
141
+ endif()
142
if(LINKER_OPTIONS)
143
# set_target_properties can't do list expansion
144
string(REPLACE ";" " " LINKER_OPTION_STR "${LINKER_OPTIONS}")
145
146
endif()
147
148
# Main CLI application
149
-option(ENABLE_CLI "Build standalone CLI application" ON)
150
+set(ENABLE_CLI ON CACHE BOOL "Build standalone CLI application")
151
if(ENABLE_CLI)
152
file(GLOB InputFiles input/input.cpp input/yuv.cpp input/y4m.cpp input/*.h)
153
file(GLOB OutputFiles output/output.cpp output/reconplay.cpp output/*.h
154
output/yuv.cpp output/y4m.cpp # recon
155
output/raw.cpp) # muxers
156
- file(GLOB FilterFiles filters/*.cpp filters/*.h)
157
source_group(input FILES ${InputFiles})
158
source_group(output FILES ${OutputFiles})
159
- source_group(filters FILES ${FilterFiles})
160
161
check_include_files(getopt.h HAVE_GETOPT_H)
162
if(NOT HAVE_GETOPT_H)
163
164
include_directories(compat/getopt)
165
set(GETOPT compat/getopt/getopt.c compat/getopt/getopt.h)
166
endif(NOT HAVE_GETOPT_H)
167
+ if(WIN32)
168
+ set(ExportDefs "${PROJECT_BINARY_DIR}/x265.def")
169
+ endif(WIN32)
170
171
if(XCODE)
172
# Xcode seems unable to link the CLI with libs, so link as one targget
173
- add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${FilterFiles} ${GETOPT} x265.cpp x265.h x265cli.h
174
- $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${YASM_OBJS} ${YASM_SRCS})
175
+ add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT}
176
+ x265.cpp x265.h x265cli.h x265-extras.h x265-extras.cpp
177
+ $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${YASM_OBJS} ${YASM_SRCS})
178
else()
179
- add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${FilterFiles} ${GETOPT} ${X265_RC_FILE} x265.cpp x265.h x265cli.h)
180
+ add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE}
181
+ ${ExportDefs} x265.cpp x265.h x265cli.h x265-extras.h x265-extras.cpp)
182
if(WIN32 OR NOT ENABLE_SHARED OR INTEL_CXX)
183
# The CLI cannot link to the shared library on Windows, it
184
# requires internal APIs not exported from the DLL
185
x265_1.7.tar.gz/source/cmake/CMakeASM_YASMInformation.cmake -> x265_1.8.tar.gz/source/cmake/CMakeASM_YASMInformation.cmake
Changed
17
1
2
endif()
3
4
if(HIGH_BIT_DEPTH)
5
- list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10)
6
+ if(MAIN12)
7
+ list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=12 -DX265_NS=${X265_NS})
8
+ else()
9
+ list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10 -DX265_NS=${X265_NS})
10
+ endif()
11
else()
12
- list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8)
13
+ list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8 -DX265_NS=${X265_NS})
14
endif()
15
16
list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS}")
17
x265_1.7.tar.gz/source/cmake/FindYasm.cmake -> x265_1.8.tar.gz/source/cmake/FindYasm.cmake
Changed
10
1
2
3
# Simple path search with YASM_ROOT environment variable override
4
find_program(YASM_EXECUTABLE
5
- NAMES yasm yasm-1.2.0-win32 yasm-1.2.0-win64
6
+ NAMES yasm yasm-1.2.0-win32 yasm-1.2.0-win64 yasm yasm-1.3.0-win32 yasm-1.3.0-win64
7
HINTS $ENV{YASM_ROOT} ${YASM_ROOT}
8
PATH_SUFFIXES bin
9
)
10
x265_1.7.tar.gz/source/common/CMakeLists.txt -> x265_1.8.tar.gz/source/common/CMakeLists.txt
Changed
56
1
2
# vim: syntax=cmake
3
4
+list(APPEND VFLAGS "-DX265_VERSION=${X265_VERSION}")
5
+if(EXTRA_LIB)
6
+ if(LINKED_8BIT)
7
+ list(APPEND VFLAGS "-DLINKED_8BIT=1")
8
+ endif(LINKED_8BIT)
9
+ if(LINKED_10BIT)
10
+ list(APPEND VFLAGS "-DLINKED_10BIT=1")
11
+ endif(LINKED_10BIT)
12
+ if(LINKED_12BIT)
13
+ list(APPEND VFLAGS "-DLINKED_12BIT=1")
14
+ endif(LINKED_12BIT)
15
+endif(EXTRA_LIB)
16
+
17
if(ENABLE_ASSEMBLY)
18
set_source_files_properties(threading.cpp primitives.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1)
19
+ list(APPEND VFLAGS "-DENABLE_ASSEMBLY=1")
20
21
set(SSE3 vec/dct-sse3.cpp)
22
set(SSSE3 vec/dct-ssse3.cpp)
23
24
mc-a2.asm pixel-util8.asm blockcopy8.asm
25
pixeladd8.asm dct8.asm)
26
if(HIGH_BIT_DEPTH)
27
- set(A_SRCS ${A_SRCS} sad16-a.asm intrapred16.asm ipfilter16.asm)
28
+ set(A_SRCS ${A_SRCS} sad16-a.asm intrapred16.asm ipfilter16.asm loopfilter.asm)
29
else()
30
set(A_SRCS ${A_SRCS} sad-a.asm intrapred8.asm intrapred8_allangs.asm ipfilter8.asm loopfilter.asm)
31
endif()
32
33
source_group(Assembly FILES ${ASM_PRIMITIVES})
34
endif(ENABLE_ASSEMBLY)
35
36
+# set_target_properties can't do list expansion
37
+string(REPLACE ";" " " VERSION_FLAGS "${VFLAGS}")
38
+set_source_files_properties(version.cpp PROPERTIES COMPILE_FLAGS ${VERSION_FLAGS})
39
+
40
check_symbol_exists(strtok_r "string.h" HAVE_STRTOK_R)
41
if(HAVE_STRTOK_R)
42
set_source_files_properties(param.cpp PROPERTIES COMPILE_FLAGS -DHAVE_STRTOK_R=1)
43
44
set(WINXP winxp.h winxp.cpp)
45
endif(WIN32)
46
47
-set_source_files_properties(version.cpp PROPERTIES COMPILE_FLAGS -DX265_VERSION=${X265_VERSION})
48
-
49
add_library(common OBJECT
50
- ${ASM_PRIMITIVES} ${VEC_PRIMITIVES}
51
- ${LIBCOMMON_SRC} ${LIBCOMMON_HDR} ${WINXP}
52
+ ${ASM_PRIMITIVES} ${VEC_PRIMITIVES} ${WINXP}
53
primitives.cpp primitives.h
54
pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp
55
constants.cpp constants.h
56
x265_1.7.tar.gz/source/common/bitstream.cpp -> x265_1.8.tar.gz/source/common/bitstream.cpp
Changed
10
1
2
#include "common.h"
3
#include "bitstream.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
#if defined(_MSC_VER)
9
#pragma warning(disable: 4244)
10
x265_1.7.tar.gz/source/common/bitstream.h -> x265_1.8.tar.gz/source/common/bitstream.h
Changed
10
1
2
#ifndef X265_BITSTREAM_H
3
#define X265_BITSTREAM_H 1
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class BitInterface
10
x265_1.7.tar.gz/source/common/common.cpp -> x265_1.8.tar.gz/source/common/common.cpp
Changed
25
1
2
#include <sys/time.h>
3
#endif
4
5
+namespace X265_NS {
6
+
7
#if CHECKED_BUILD || _DEBUG
8
int g_checkFailures;
9
#endif
10
11
#endif
12
}
13
14
-using namespace x265;
15
-
16
#define X265_ALIGNBYTES 32
17
18
#if _WIN32
19
20
fclose(fh);
21
return NULL;
22
}
23
+
24
+}
25
x265_1.7.tar.gz/source/common/common.h -> x265_1.8.tar.gz/source/common/common.h
Changed
66
1
2
/* If compiled with CHECKED_BUILD perform run-time checks and log any that
3
* fail, both to stderr and to a file */
4
#if CHECKED_BUILD || _DEBUG
5
-extern int g_checkFailures;
6
+namespace X265_NS { extern int g_checkFailures; }
7
#define X265_CHECK(expr, ...) if (!(expr)) { \
8
x265_log(NULL, X265_LOG_ERROR, __VA_ARGS__); \
9
FILE *fp = fopen("x265_check_failures.txt", "a"); \
10
11
typedef uint64_t sum2_t;
12
typedef uint64_t pixel4;
13
typedef int64_t ssum2_t;
14
-#define X265_DEPTH 10 // compile time configurable bit depth
15
#else
16
typedef uint8_t pixel;
17
typedef uint16_t sum_t;
18
typedef uint32_t sum2_t;
19
typedef uint32_t pixel4;
20
-typedef int32_t ssum2_t; //Signed sum
21
-#define X265_DEPTH 8 // compile time configurable bit depth
22
+typedef int32_t ssum2_t; // Signed sum
23
#endif // if HIGH_BIT_DEPTH
24
25
+#if X265_DEPTH <= 10
26
+typedef uint32_t sse_ret_t;
27
+#else
28
+typedef uint64_t sse_ret_t;
29
+#endif
30
+
31
#ifndef NULL
32
#define NULL 0
33
#endif
34
35
#define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
36
#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
37
38
-namespace x265 {
39
+namespace X265_NS {
40
41
enum { SAO_NUM_OFFSET = 4 };
42
43
44
/* located in pixel.cpp */
45
void extendPicBorder(pixel* recon, intptr_t stride, int width, int height, int marginX, int marginY);
46
47
-}
48
-
49
-/* outside x265 namespace, but prefixed. defined in common.cpp */
50
+/* located in common.cpp */
51
int64_t x265_mdate(void);
52
#define x265_log(param, ...) general_log(param, "x265", __VA_ARGS__)
53
void general_log(const x265_param* param, const char* caller, int level, const char* fmt, ...);
54
55
void x265_free(void *ptr);
56
char* x265_slurp_file(const char *filename);
57
58
-void x265_setup_primitives(x265_param* param, int cpu); /* primitives.cpp */
59
+/* located in primitives.cpp */
60
+void x265_setup_primitives(x265_param* param);
61
+void x265_report_simd(x265_param* param);
62
+}
63
64
#include "constants.h"
65
66
x265_1.7.tar.gz/source/common/constants.cpp -> x265_1.8.tar.gz/source/common/constants.cpp
Changed
69
1
2
#include "constants.h"
3
#include "threading.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
+
8
+#if X265_DEPTH == 12
9
+
10
+// lambda = pow(2, (double)q / 6 - 2) * (1 << (12 - 8));
11
+double x265_lambda_tab[QP_MAX_MAX + 1] =
12
+{
13
+ 4.0000, 4.4898, 5.0397, 5.6569, 6.3496,
14
+ 7.1272, 8.0000, 8.9797, 10.0794, 11.3137,
15
+ 12.6992, 14.2544, 16.0000, 17.9594, 20.1587,
16
+ 22.6274, 25.3984, 28.5088, 32.0000, 35.9188,
17
+ 40.3175, 45.2548, 50.7968, 57.0175, 64.0000,
18
+ 71.8376, 80.6349, 90.5097, 101.5937, 114.0350,
19
+ 128.0000, 143.6751, 161.2699, 181.0193, 203.1873,
20
+ 228.0701, 256.0000, 287.3503, 322.5398, 362.0387,
21
+ 406.3747, 456.1401, 512.0000, 574.7006, 645.0796,
22
+ 724.0773, 812.7493, 912.2803, 1024.0000, 1149.4011,
23
+ 1290.1592, 1448.1547, 1625.4987, 1824.5606, 2048.0000,
24
+ 2298.8023, 2580.3183, 2896.3094, 3250.9974, 3649.1211,
25
+ 4096.0000, 4597.6045, 5160.6366, 5792.6188, 6501.9947,
26
+ 7298.2423, 8192.0000, 9195.2091, 10321.2732, 11585.2375
27
+};
28
+
29
+// lambda2 = pow(lambda, 2) * scale (0.85);
30
+double x265_lambda2_tab[QP_MAX_MAX + 1] =
31
+{
32
+ 13.6000, 17.1349, 21.5887, 27.2000, 34.2699,
33
+ 43.1773, 54.4000, 68.5397, 86.3546, 108.8000,
34
+ 137.0794, 172.7092, 217.6000, 274.1588, 345.4185,
35
+ 435.2000, 548.3176, 690.8369, 870.4000, 1096.6353,
36
+ 1381.6739, 1740.8000, 2193.2706, 2763.3478, 3481.6000,
37
+ 4386.5411, 5526.6955, 6963.2000, 8773.0822, 11053.3910,
38
+ 13926.4000, 17546.1645, 22106.7819, 27852.8000, 35092.3291,
39
+ 44213.5641, 55705.6000, 70184.6579, 88427.1282, 111411.2000,
40
+ 140369.3159, 176854.2563, 222822.4000, 280738.6324, 353708.5127,
41
+ 445644.8001, 561477.2648, 707417.0237, 891289.6000, 1122954.5277,
42
+ 1414834.0484, 1782579.2003, 2245909.0566, 2829668.0981, 3565158.4000,
43
+ 4491818.1146, 5659336.1938, 7130316.8013, 8983636.2264, 11318672.3923,
44
+ 14260633.6000, 17967272.4585, 22637344.7751, 28521267.1953, 35934544.9165,
45
+ 45274689.5567, 57042534.4000, 71869089.8338, 90549379.1181, 114085068.8008
46
+};
47
+
48
+#elif X265_DEPTH == 10
49
50
-#if HIGH_BIT_DEPTH
51
// lambda = pow(2, (double)q / 6 - 2) * (1 << (X265_DEPTH - 8));
52
double x265_lambda_tab[QP_MAX_MAX + 1] =
53
{
54
55
4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31, 36, 44, 52, 60, 37, 45, 53, 61, 38, 46, 54, 62, 39, 47, 55, 63 }
56
};
57
58
-ALIGN_VAR_16(const uint16_t, g_scan4x4[NUM_SCAN_TYPE][4 * 4]) =
59
+ALIGN_VAR_16(const uint16_t, g_scan4x4[NUM_SCAN_TYPE + 1][4 * 4]) =
60
{
61
{ 0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 13, 10, 7, 14, 11, 15 },
62
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
63
- { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }
64
+ { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 },
65
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
66
};
67
68
const uint16_t g_scan16x16[16 * 16] =
69
x265_1.7.tar.gz/source/common/constants.h -> x265_1.8.tar.gz/source/common/constants.h
Changed
19
1
2
3
#include "common.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
extern int g_ctuSizeConfigured;
10
11
extern const uint16_t* const g_scanOrder[NUM_SCAN_TYPE][NUM_SCAN_SIZE];
12
extern const uint16_t* const g_scanOrderCG[NUM_SCAN_TYPE][NUM_SCAN_SIZE];
13
extern const uint16_t g_scan8x8diag[8 * 8];
14
-extern const uint16_t g_scan4x4[NUM_SCAN_TYPE][4 * 4];
15
+extern const uint16_t g_scan4x4[NUM_SCAN_TYPE + 1][4 * 4]; // +1 for safe buffer area for codeCoeffNxN assembly optimize, there have up to 15 bytes beyond bound read
16
17
extern const uint8_t g_lastCoeffTable[32];
18
extern const uint8_t g_goRiceRange[5]; // maximum value coded with Rice codes
19
x265_1.7.tar.gz/source/common/contexts.h -> x265_1.8.tar.gz/source/common/contexts.h
Changed
16
1
2
#define OFF_TQUANT_BYPASS_FLAG_CTX (OFF_TRANSFORMSKIP_FLAG_CTX + 2 * NUM_TRANSFORMSKIP_FLAG_CTX)
3
#define MAX_OFF_CTX_MOD (OFF_TQUANT_BYPASS_FLAG_CTX + NUM_TQUANT_BYPASS_FLAG_CTX)
4
5
-namespace x265 {
6
+extern "C" const uint32_t PFX(entropyStateBits)[128];
7
+
8
+namespace X265_NS {
9
// private namespace
10
11
extern const uint32_t g_entropyBits[128];
12
-extern const uint32_t g_entropyStateBits[128];
13
extern const uint8_t g_nextState[128][2];
14
15
#define sbacGetMps(S) ((S) & 1)
16
x265_1.7.tar.gz/source/common/cpu.cpp -> x265_1.8.tar.gz/source/common/cpu.cpp
Changed
137
1
2
3
#endif // if X265_ARCH_ARM
4
5
-namespace x265 {
6
+namespace X265_NS {
7
const cpu_name_t cpu_names[] =
8
{
9
#if X265_ARCH_X86
10
11
12
extern "C" {
13
/* cpu-a.asm */
14
-int x265_cpu_cpuid_test(void);
15
-void x265_cpu_cpuid(uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
16
-void x265_cpu_xgetbv(uint32_t op, uint32_t *eax, uint32_t *edx);
17
+int PFX(cpu_cpuid_test)(void);
18
+void PFX(cpu_cpuid)(uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
19
+void PFX(cpu_xgetbv)(uint32_t op, uint32_t *eax, uint32_t *edx);
20
}
21
22
#if defined(_MSC_VER)
23
24
uint32_t max_extended_cap, max_basic_cap;
25
26
#if !X86_64
27
- if (!x265_cpu_cpuid_test())
28
+ if (!PFX(cpu_cpuid_test)())
29
return 0;
30
#endif
31
32
- x265_cpu_cpuid(0, &eax, vendor + 0, vendor + 2, vendor + 1);
33
+ PFX(cpu_cpuid)(0, &eax, vendor + 0, vendor + 2, vendor + 1);
34
max_basic_cap = eax;
35
if (max_basic_cap == 0)
36
return 0;
37
38
- x265_cpu_cpuid(1, &eax, &ebx, &ecx, &edx);
39
+ PFX(cpu_cpuid)(1, &eax, &ebx, &ecx, &edx);
40
if (edx & 0x00800000)
41
cpu |= X265_CPU_MMX;
42
else
43
44
if ((ecx & 0x18000000) == 0x18000000)
45
{
46
/* Check for OS support */
47
- x265_cpu_xgetbv(0, &eax, &edx);
48
+ PFX(cpu_xgetbv)(0, &eax, &edx);
49
if ((eax & 0x6) == 0x6)
50
{
51
cpu |= X265_CPU_AVX;
52
53
54
if (max_basic_cap >= 7)
55
{
56
- x265_cpu_cpuid(7, &eax, &ebx, &ecx, &edx);
57
+ PFX(cpu_cpuid)(7, &eax, &ebx, &ecx, &edx);
58
/* AVX2 requires OS support, but BMI1/2 don't. */
59
if ((cpu & X265_CPU_AVX) && (ebx & 0x00000020))
60
cpu |= X265_CPU_AVX2;
61
62
if (cpu & X265_CPU_SSSE3)
63
cpu |= X265_CPU_SSE2_IS_FAST;
64
65
- x265_cpu_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
66
+ PFX(cpu_cpuid)(0x80000000, &eax, &ebx, &ecx, &edx);
67
max_extended_cap = eax;
68
69
if (max_extended_cap >= 0x80000001)
70
{
71
- x265_cpu_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
72
+ PFX(cpu_cpuid)(0x80000001, &eax, &ebx, &ecx, &edx);
73
74
if (ecx & 0x00000020)
75
cpu |= X265_CPU_LZCNT; /* Supported by Intel chips starting with Haswell */
76
77
78
if (!strcmp((char*)vendor, "GenuineIntel"))
79
{
80
- x265_cpu_cpuid(1, &eax, &ebx, &ecx, &edx);
81
+ PFX(cpu_cpuid)(1, &eax, &ebx, &ecx, &edx);
82
int family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
83
int model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
84
if (family == 6)
85
86
if ((!strcmp((char*)vendor, "GenuineIntel") || !strcmp((char*)vendor, "CyrixInstead")) && !(cpu & X265_CPU_SSE42))
87
{
88
/* cacheline size is specified in 3 places, any of which may be missing */
89
- x265_cpu_cpuid(1, &eax, &ebx, &ecx, &edx);
90
+ PFX(cpu_cpuid)(1, &eax, &ebx, &ecx, &edx);
91
int cache = (ebx & 0xff00) >> 5; // cflush size
92
if (!cache && max_extended_cap >= 0x80000006)
93
{
94
- x265_cpu_cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
95
+ PFX(cpu_cpuid)(0x80000006, &eax, &ebx, &ecx, &edx);
96
cache = ecx & 0xff; // cacheline size
97
}
98
if (!cache && max_basic_cap >= 2)
99
100
int max, i = 0;
101
do
102
{
103
- x265_cpu_cpuid(2, buf + 0, buf + 1, buf + 2, buf + 3);
104
+ PFX(cpu_cpuid)(2, buf + 0, buf + 1, buf + 2, buf + 3);
105
max = buf[0] & 0xff;
106
buf[0] &= ~0xff;
107
for (int j = 0; j < 4; j++)
108
109
#elif X265_ARCH_ARM
110
111
extern "C" {
112
-void x265_cpu_neon_test(void);
113
-int x265_cpu_fast_neon_mrc_test(void);
114
+void PFX(cpu_neon_test)(void);
115
+int PFX(cpu_fast_neon_mrc_test)(void);
116
}
117
118
uint32_t cpu_detect(void)
119
120
}
121
122
canjump = 1;
123
- x265_cpu_neon_test();
124
+ PFX(cpu_neon_test)();
125
canjump = 0;
126
signal(SIGILL, oldsig);
127
#endif // if !HAVE_NEON
128
129
// which may result in incorrect detection and the counters stuck enabled.
130
// right now Apple does not seem to support performance counters for this test
131
#ifndef __MACH__
132
- flags |= x265_cpu_fast_neon_mrc_test() ? X265_CPU_FAST_NEON_MRC : 0;
133
+ flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0;
134
#endif
135
// TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
136
#endif // if HAVE_ARMV6
137
x265_1.7.tar.gz/source/common/cpu.h -> x265_1.8.tar.gz/source/common/cpu.h
Changed
37
1
2
3
#include "common.h"
4
5
+/* All assembly functions are prefixed with X265_NS (macro expanded) */
6
+#define PFX3(prefix, name) prefix ## _ ## name
7
+#define PFX2(prefix, name) PFX3(prefix, name)
8
+#define PFX(name) PFX2(X265_NS, name)
9
+
10
// from cpu-a.asm, if ASM primitives are compiled, else primitives.cpp
11
-extern "C" void x265_cpu_emms(void);
12
-extern "C" void x265_safe_intel_cpu_indicator_init(void);
13
+extern "C" void PFX(cpu_emms)(void);
14
+extern "C" void PFX(safe_intel_cpu_indicator_init)(void);
15
16
#if _MSC_VER && _WIN64
17
-#define x265_emms() x265_cpu_emms()
18
+#define x265_emms() PFX(cpu_emms)()
19
#elif _MSC_VER
20
#include <mmintrin.h>
21
#define x265_emms() _mm_empty()
22
#elif __GNUC__
23
// Cannot use _mm_empty() directly without compiling all the source with
24
// a fixed CPU arch, which we would like to avoid at the moment
25
-#define x265_emms() x265_cpu_emms()
26
+#define x265_emms() PFX(cpu_emms)()
27
#else
28
-#define x265_emms() x265_cpu_emms()
29
+#define x265_emms() PFX(cpu_emms)()
30
#endif
31
32
-namespace x265 {
33
+namespace X265_NS {
34
uint32_t cpu_detect(void);
35
36
struct cpu_name_t
37
x265_1.7.tar.gz/source/common/cudata.cpp -> x265_1.8.tar.gz/source/common/cudata.cpp
Changed
92
1
2
#include "mv.h"
3
#include "cudata.h"
4
5
-using namespace x265;
6
-
7
-namespace {
8
-// file private namespace
9
+using namespace X265_NS;
10
11
/* for all bcast* and copy* functions, dst and src are aligned to MIN(size, 32) */
12
13
-void bcast1(uint8_t* dst, uint8_t val) { dst[0] = val; }
14
+static void bcast1(uint8_t* dst, uint8_t val) { dst[0] = val; }
15
16
-void copy4(uint8_t* dst, uint8_t* src) { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; }
17
-void bcast4(uint8_t* dst, uint8_t val) { ((uint32_t*)dst)[0] = 0x01010101u * val; }
18
+static void copy4(uint8_t* dst, uint8_t* src) { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; }
19
+static void bcast4(uint8_t* dst, uint8_t val) { ((uint32_t*)dst)[0] = 0x01010101u * val; }
20
21
-void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; }
22
-void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; }
23
+static void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; }
24
+static void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; }
25
26
-void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1];
27
- ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3];
28
- ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5];
29
- ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; }
30
-void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val;
31
- ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval;
32
- ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; }
33
+static void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1];
34
+ ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3];
35
+ ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5];
36
+ ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; }
37
+static void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val;
38
+ ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval;
39
+ ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; }
40
41
/* at 256 bytes, memset/memcpy will probably use SIMD more effectively than our uint64_t hack,
42
* but hand-written assembly would beat it. */
43
-void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); }
44
-void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); }
45
+static void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); }
46
+static void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); }
47
+
48
+namespace {
49
+// file private namespace
50
51
/* Check whether 2 addresses point to the same column */
52
inline bool isEqualCol(int addrA, int addrB, int numUnits)
53
54
return MV((int16_t)mvx, (int16_t)mvy);
55
}
56
57
-// Partition table.
58
-// First index is partitioning mode. Second index is partition index.
59
-// Third index is 0 for partition sizes, 1 for partition offsets. The
60
-// sizes and offsets are encoded as two packed 4-bit values (X,Y).
61
-// X and Y represent 1/4 fractions of the block size.
62
-const uint32_t partTable[8][4][2] =
63
-{
64
- // XY
65
- { { 0x44, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2Nx2N.
66
- { { 0x42, 0x00 }, { 0x42, 0x02 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxN.
67
- { { 0x24, 0x00 }, { 0x24, 0x20 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_Nx2N.
68
- { { 0x22, 0x00 }, { 0x22, 0x20 }, { 0x22, 0x02 }, { 0x22, 0x22 } }, // SIZE_NxN.
69
- { { 0x41, 0x00 }, { 0x43, 0x01 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnU.
70
- { { 0x43, 0x00 }, { 0x41, 0x03 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnD.
71
- { { 0x14, 0x00 }, { 0x34, 0x10 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_nLx2N.
72
- { { 0x34, 0x00 }, { 0x14, 0x30 }, { 0x00, 0x00 }, { 0x00, 0x00 } } // SIZE_nRx2N.
73
-};
74
-
75
-// Partition Address table.
76
-// First index is partitioning mode. Second index is partition address.
77
-const uint32_t partAddrTable[8][4] =
78
-{
79
- { 0x00, 0x00, 0x00, 0x00 }, // SIZE_2Nx2N.
80
- { 0x00, 0x08, 0x08, 0x08 }, // SIZE_2NxN.
81
- { 0x00, 0x04, 0x04, 0x04 }, // SIZE_Nx2N.
82
- { 0x00, 0x04, 0x08, 0x0C }, // SIZE_NxN.
83
- { 0x00, 0x02, 0x02, 0x02 }, // SIZE_2NxnU.
84
- { 0x00, 0x0A, 0x0A, 0x0A }, // SIZE_2NxnD.
85
- { 0x00, 0x01, 0x01, 0x01 }, // SIZE_nLx2N.
86
- { 0x00, 0x05, 0x05, 0x05 } // SIZE_nRx2N.
87
-};
88
-
89
}
90
91
cubcast_t CUData::s_partSet[NUM_FULL_DEPTH] = { NULL, NULL, NULL, NULL, NULL };
92
x265_1.7.tar.gz/source/common/cudata.h -> x265_1.8.tar.gz/source/common/cudata.h
Changed
62
1
2
#include "slice.h"
3
#include "mv.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class FrameData;
10
11
// Partition count table, index represents partitioning mode.
12
const uint32_t nbPartsTable[8] = { 1, 2, 2, 4, 2, 2, 2, 2 };
13
14
+// Partition table.
15
+// First index is partitioning mode. Second index is partition index.
16
+// Third index is 0 for partition sizes, 1 for partition offsets. The
17
+// sizes and offsets are encoded as two packed 4-bit values (X,Y).
18
+// X and Y represent 1/4 fractions of the block size.
19
+const uint32_t partTable[8][4][2] =
20
+{
21
+ // XY
22
+ { { 0x44, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2Nx2N.
23
+ { { 0x42, 0x00 }, { 0x42, 0x02 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxN.
24
+ { { 0x24, 0x00 }, { 0x24, 0x20 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_Nx2N.
25
+ { { 0x22, 0x00 }, { 0x22, 0x20 }, { 0x22, 0x02 }, { 0x22, 0x22 } }, // SIZE_NxN.
26
+ { { 0x41, 0x00 }, { 0x43, 0x01 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnU.
27
+ { { 0x43, 0x00 }, { 0x41, 0x03 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnD.
28
+ { { 0x14, 0x00 }, { 0x34, 0x10 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_nLx2N.
29
+ { { 0x34, 0x00 }, { 0x14, 0x30 }, { 0x00, 0x00 }, { 0x00, 0x00 } } // SIZE_nRx2N.
30
+};
31
+
32
+// Partition Address table.
33
+// First index is partitioning mode. Second index is partition address.
34
+const uint32_t partAddrTable[8][4] =
35
+{
36
+ { 0x00, 0x00, 0x00, 0x00 }, // SIZE_2Nx2N.
37
+ { 0x00, 0x08, 0x08, 0x08 }, // SIZE_2NxN.
38
+ { 0x00, 0x04, 0x04, 0x04 }, // SIZE_Nx2N.
39
+ { 0x00, 0x04, 0x08, 0x0C }, // SIZE_NxN.
40
+ { 0x00, 0x02, 0x02, 0x02 }, // SIZE_2NxnU.
41
+ { 0x00, 0x0A, 0x0A, 0x0A }, // SIZE_2NxnD.
42
+ { 0x00, 0x01, 0x01, 0x01 }, // SIZE_nLx2N.
43
+ { 0x00, 0x05, 0x05, 0x05 } // SIZE_nRx2N.
44
+};
45
+
46
// Holds part data for a CU of a given size, from an 8x8 CU to a CTU
47
class CUData
48
{
49
50
void getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const;
51
void getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const;
52
void getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const;
53
+ uint32_t getBestRefIdx(uint32_t subPartIdx) const { return ((m_interDir[subPartIdx] & 1) << m_refIdx[0][subPartIdx]) |
54
+ (((m_interDir[subPartIdx] >> 1) & 1) << (m_refIdx[1][subPartIdx] + 16)); }
55
+ uint32_t getPUOffset(uint32_t puIdx, uint32_t absPartIdx) const { return (partAddrTable[(int)m_partSize[absPartIdx]][puIdx] << (g_unitSizeDepth - m_cuDepth[absPartIdx]) * 2) >> 4; }
56
57
- uint32_t getNumPartInter() const { return nbPartsTable[(int)m_partSize[0]]; }
58
+ uint32_t getNumPartInter(uint32_t absPartIdx) const { return nbPartsTable[(int)m_partSize[absPartIdx]]; }
59
bool isIntra(uint32_t absPartIdx) const { return m_predMode[absPartIdx] == MODE_INTRA; }
60
bool isInter(uint32_t absPartIdx) const { return !!(m_predMode[absPartIdx] & MODE_INTER); }
61
bool isSkipped(uint32_t absPartIdx) const { return m_predMode[absPartIdx] == MODE_SKIP; }
62
x265_1.7.tar.gz/source/common/dct.cpp -> x265_1.8.tar.gz/source/common/dct.cpp
Changed
201
1
2
3
#include "common.h"
4
#include "primitives.h"
5
+#include "contexts.h" // costCoeffNxN_c
6
+#include "threading.h" // CLZ
7
8
-using namespace x265;
9
+using namespace X265_NS;
10
11
#if _MSC_VER
12
#pragma warning(disable: 4127) // conditional expression is constant, typical for templated functions
13
#endif
14
15
-namespace {
16
-// anonymous file-static namespace
17
-
18
// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
19
// give identical results
20
-void fastForwardDst(const int16_t* block, int16_t* coeff, int shift) // input block, output coeff
21
+static void fastForwardDst(const int16_t* block, int16_t* coeff, int shift) // input block, output coeff
22
{
23
int c[4];
24
int rnd_factor = 1 << (shift - 1);
25
26
}
27
}
28
29
-void inversedst(const int16_t* tmp, int16_t* block, int shift) // input tmp, output block
30
+static void inversedst(const int16_t* tmp, int16_t* block, int shift) // input tmp, output block
31
{
32
int i, c[4];
33
int rnd_factor = 1 << (shift - 1);
34
35
}
36
}
37
38
-void partialButterfly16(const int16_t* src, int16_t* dst, int shift, int line)
39
+static void partialButterfly16(const int16_t* src, int16_t* dst, int shift, int line)
40
{
41
int j, k;
42
int E[8], O[8];
43
44
}
45
}
46
47
-void partialButterfly32(const int16_t* src, int16_t* dst, int shift, int line)
48
+static void partialButterfly32(const int16_t* src, int16_t* dst, int shift, int line)
49
{
50
int j, k;
51
int E[16], O[16];
52
53
}
54
}
55
56
-void partialButterfly8(const int16_t* src, int16_t* dst, int shift, int line)
57
+static void partialButterfly8(const int16_t* src, int16_t* dst, int shift, int line)
58
{
59
int j, k;
60
int E[4], O[4];
61
62
}
63
}
64
65
-void partialButterflyInverse4(const int16_t* src, int16_t* dst, int shift, int line)
66
+static void partialButterflyInverse4(const int16_t* src, int16_t* dst, int shift, int line)
67
{
68
int j;
69
int E[2], O[2];
70
71
}
72
}
73
74
-void partialButterflyInverse8(const int16_t* src, int16_t* dst, int shift, int line)
75
+static void partialButterflyInverse8(const int16_t* src, int16_t* dst, int shift, int line)
76
{
77
int j, k;
78
int E[4], O[4];
79
80
}
81
}
82
83
-void partialButterflyInverse16(const int16_t* src, int16_t* dst, int shift, int line)
84
+static void partialButterflyInverse16(const int16_t* src, int16_t* dst, int shift, int line)
85
{
86
int j, k;
87
int E[8], O[8];
88
89
}
90
}
91
92
-void partialButterflyInverse32(const int16_t* src, int16_t* dst, int shift, int line)
93
+static void partialButterflyInverse32(const int16_t* src, int16_t* dst, int shift, int line)
94
{
95
int j, k;
96
int E[16], O[16];
97
98
}
99
}
100
101
-void partialButterfly4(const int16_t* src, int16_t* dst, int shift, int line)
102
+static void partialButterfly4(const int16_t* src, int16_t* dst, int shift, int line)
103
{
104
int j;
105
int E[2], O[2];
106
107
}
108
}
109
110
-void dst4_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
111
+static void dst4_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
112
{
113
const int shift_1st = 1 + X265_DEPTH - 8;
114
const int shift_2nd = 8;
115
116
fastForwardDst(coef, dst, shift_2nd);
117
}
118
119
-void dct4_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
120
+static void dct4_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
121
{
122
const int shift_1st = 1 + X265_DEPTH - 8;
123
const int shift_2nd = 8;
124
125
partialButterfly4(coef, dst, shift_2nd, 4);
126
}
127
128
-void dct8_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
129
+static void dct8_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
130
{
131
const int shift_1st = 2 + X265_DEPTH - 8;
132
const int shift_2nd = 9;
133
134
partialButterfly8(coef, dst, shift_2nd, 8);
135
}
136
137
-void dct16_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
138
+static void dct16_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
139
{
140
const int shift_1st = 3 + X265_DEPTH - 8;
141
const int shift_2nd = 10;
142
143
partialButterfly16(coef, dst, shift_2nd, 16);
144
}
145
146
-void dct32_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
147
+static void dct32_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
148
{
149
const int shift_1st = 4 + X265_DEPTH - 8;
150
const int shift_2nd = 11;
151
152
partialButterfly32(coef, dst, shift_2nd, 32);
153
}
154
155
-void idst4_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
156
+static void idst4_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
157
{
158
const int shift_1st = 7;
159
const int shift_2nd = 12 - (X265_DEPTH - 8);
160
161
}
162
}
163
164
-void idct4_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
165
+static void idct4_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
166
{
167
const int shift_1st = 7;
168
const int shift_2nd = 12 - (X265_DEPTH - 8);
169
170
}
171
}
172
173
-void idct8_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
174
+static void idct8_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
175
{
176
const int shift_1st = 7;
177
const int shift_2nd = 12 - (X265_DEPTH - 8);
178
179
}
180
}
181
182
-void idct16_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
183
+static void idct16_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
184
{
185
const int shift_1st = 7;
186
const int shift_2nd = 12 - (X265_DEPTH - 8);
187
188
}
189
}
190
191
-void idct32_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
192
+static void idct32_c(const int16_t* src, int16_t* dst, intptr_t dstStride)
193
{
194
const int shift_1st = 7;
195
const int shift_2nd = 12 - (X265_DEPTH - 8);
196
197
}
198
}
199
200
-void dequant_normal_c(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift)
201
x265_1.7.tar.gz/source/common/deblock.cpp -> x265_1.8.tar.gz/source/common/deblock.cpp
Changed
10
1
2
#include "slice.h"
3
#include "mv.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
#define DEBLOCK_SMALLEST_BLOCK 8
9
#define DEFAULT_INTRA_TC_OFFSET 2
10
x265_1.7.tar.gz/source/common/deblock.h -> x265_1.8.tar.gz/source/common/deblock.h
Changed
10
1
2
3
#include "common.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class CUData;
10
x265_1.7.tar.gz/source/common/frame.cpp -> x265_1.8.tar.gz/source/common/frame.cpp
Changed
10
1
2
#include "picyuv.h"
3
#include "framedata.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
Frame::Frame()
9
{
10
x265_1.7.tar.gz/source/common/frame.h -> x265_1.8.tar.gz/source/common/frame.h
Changed
10
1
2
#include "lowres.h"
3
#include "threading.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class FrameData;
10
x265_1.7.tar.gz/source/common/framedata.cpp -> x265_1.8.tar.gz/source/common/framedata.cpp
Changed
10
1
2
#include "framedata.h"
3
#include "picyuv.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
FrameData::FrameData()
9
{
10
x265_1.7.tar.gz/source/common/framedata.h -> x265_1.8.tar.gz/source/common/framedata.h
Changed
72
1
2
#include "slice.h"
3
#include "cudata.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class PicYuv;
10
class JobProvider;
11
12
+#define INTER_MODES 4 // 2Nx2N, 2NxN, Nx2N, AMP modes
13
+#define INTRA_MODES 3 // DC, Planar, Angular modes
14
+
15
+/* Current frame stats for 2 pass */
16
+struct FrameStats
17
+{
18
+ int mvBits; /* MV bits (MV+Ref+Block Type) */
19
+ int coeffBits; /* Texture bits (DCT coefs) */
20
+ int miscBits;
21
+
22
+ int intra8x8Cnt;
23
+ int inter8x8Cnt;
24
+ int skip8x8Cnt;
25
+
26
+ /* CU type counts stored as percentage */
27
+ double percent8x8Intra;
28
+ double percent8x8Inter;
29
+ double percent8x8Skip;
30
+ double avgLumaDistortion;
31
+ double avgChromaDistortion;
32
+ double avgPsyEnergy;
33
+ double avgLumaLevel;
34
+ double lumaLevel;
35
+ double percentIntraNxN;
36
+ double percentSkipCu[NUM_CU_DEPTH];
37
+ double percentMergeCu[NUM_CU_DEPTH];
38
+ double percentIntraDistribution[NUM_CU_DEPTH][INTRA_MODES];
39
+ double percentInterDistribution[NUM_CU_DEPTH][3]; // 2Nx2N, RECT, AMP modes percentage
40
+
41
+ uint64_t cntIntraNxN;
42
+ uint64_t totalCu;
43
+ uint64_t totalCtu;
44
+ uint64_t lumaDistortion;
45
+ uint64_t chromaDistortion;
46
+ uint64_t psyEnergy;
47
+ uint64_t cntSkipCu[NUM_CU_DEPTH];
48
+ uint64_t cntMergeCu[NUM_CU_DEPTH];
49
+ uint64_t cntInter[NUM_CU_DEPTH];
50
+ uint64_t cntIntra[NUM_CU_DEPTH];
51
+ uint64_t cuInterDistribution[NUM_CU_DEPTH][INTER_MODES];
52
+ uint64_t cuIntraDistribution[NUM_CU_DEPTH][INTRA_MODES];
53
+ uint16_t maxLumaLevel;
54
+
55
+ FrameStats()
56
+ {
57
+ memset(this, 0, sizeof(FrameStats));
58
+ }
59
+};
60
+
61
/* Per-frame data that is used during encodes and referenced while the picture
62
* is available for reference. A FrameData instance is attached to a Frame as it
63
* comes out of the lookahead. Frames which are not being encoded do not have a
64
65
66
RCStatCU* m_cuStat;
67
RCStatRow* m_rowStat;
68
+ FrameStats m_frameStats; // stats of current frame for multi-pass encodes
69
70
double m_avgQpRc; /* avg QP as decided by rate-control */
71
double m_avgQpAq; /* avg QP as decided by AQ in addition to rate-control */
72
x265_1.7.tar.gz/source/common/intrapred.cpp -> x265_1.8.tar.gz/source/common/intrapred.cpp
Changed
28
1
2
#include "common.h"
3
#include "primitives.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
namespace {
9
10
11
filtered[tuSize2 + tuSize2] = leftLast;
12
}
13
14
-void dcPredFilter(const pixel* above, const pixel* left, pixel* dst, intptr_t dststride, int size)
15
+static void dcPredFilter(const pixel* above, const pixel* left, pixel* dst, intptr_t dststride, int size)
16
{
17
// boundary pixels processing
18
dst[0] = (pixel)((above[0] + left[0] + 2 * dst[0] + 2) >> 2);
19
20
}
21
}
22
23
-namespace x265 {
24
+namespace X265_NS {
25
// x265 private namespace
26
27
void setupIntraPrimitives_c(EncoderPrimitives& p)
28
x265_1.7.tar.gz/source/common/ipfilter.cpp -> x265_1.8.tar.gz/source/common/ipfilter.cpp
Changed
36
1
2
#include "primitives.h"
3
#include "x265.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
#if _MSC_VER
9
#pragma warning(disable: 4127) // conditional expression is constant, typical for templated functions
10
#endif
11
12
namespace {
13
+// file local namespace
14
+
15
template<int width, int height>
16
void filterPixelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride)
17
{
18
19
}
20
}
21
22
-void extendCURowColBorder(pixel* txt, intptr_t stride, int width, int height, int marginX)
23
+static void extendCURowColBorder(pixel* txt, intptr_t stride, int width, int height, int marginX)
24
{
25
for (int y = 0; y < height; y++)
26
{
27
28
}
29
}
30
31
-namespace x265 {
32
+namespace X265_NS {
33
// x265 private namespace
34
35
#define CHROMA_420(W, H) \
36
x265_1.7.tar.gz/source/common/loopfilter.cpp -> x265_1.8.tar.gz/source/common/loopfilter.cpp
Changed
71
1
2
return (x >> 31) | ((int)((((uint32_t)-x)) >> 31));
3
}
4
5
-void calSign(int8_t *dst, const pixel *src1, const pixel *src2, const int endX)
6
+static void calSign(int8_t *dst, const pixel *src1, const pixel *src2, const int endX)
7
{
8
for (int x = 0; x < endX; x++)
9
dst[x] = signOf(src1[x] - src2[x]);
10
}
11
12
-void processSaoCUE0(pixel * rec, int8_t * offsetEo, int width, int8_t* signLeft, intptr_t stride)
13
+static void processSaoCUE0(pixel * rec, int8_t * offsetEo, int width, int8_t* signLeft, intptr_t stride)
14
{
15
int x, y;
16
int8_t signRight, signLeft0;
17
18
}
19
}
20
21
-void processSaoCUE1(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width)
22
+static void processSaoCUE1(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width)
23
{
24
int x;
25
int8_t signDown;
26
27
}
28
}
29
30
-void processSaoCUE1_2Rows(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width)
31
+static void processSaoCUE1_2Rows(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width)
32
{
33
int x, y;
34
int8_t signDown;
35
36
}
37
}
38
39
-void processSaoCUE2(pixel * rec, int8_t * bufft, int8_t * buff1, int8_t * offsetEo, int width, intptr_t stride)
40
+static void processSaoCUE2(pixel * rec, int8_t * bufft, int8_t * buff1, int8_t * offsetEo, int width, intptr_t stride)
41
{
42
int x;
43
for (x = 0; x < width; x++)
44
45
}
46
}
47
48
-void processSaoCUE3(pixel *rec, int8_t *upBuff1, int8_t *offsetEo, intptr_t stride, int startX, int endX)
49
+static void processSaoCUE3(pixel *rec, int8_t *upBuff1, int8_t *offsetEo, intptr_t stride, int startX, int endX)
50
{
51
int8_t signDown;
52
int8_t edgeType;
53
54
}
55
}
56
57
-void processSaoCUB0(pixel* rec, const int8_t* offset, int ctuWidth, int ctuHeight, intptr_t stride)
58
+static void processSaoCUB0(pixel* rec, const int8_t* offset, int ctuWidth, int ctuHeight, intptr_t stride)
59
{
60
#define SAO_BO_BITS 5
61
const int boShift = X265_DEPTH - SAO_BO_BITS;
62
63
}
64
}
65
66
-namespace x265 {
67
+namespace X265_NS {
68
void setupLoopFilterPrimitives_c(EncoderPrimitives &p)
69
{
70
p.saoCuOrgE0 = processSaoCUE0;
71
x265_1.7.tar.gz/source/common/lowres.cpp -> x265_1.8.tar.gz/source/common/lowres.cpp
Changed
47
1
2
#include "lowres.h"
3
#include "mv.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
bool Lowres::create(PicYuv *origPic, int _bframes, bool bAQEnabled)
9
{
10
11
lumaStride = width + 2 * origPic->m_lumaMarginX;
12
if (lumaStride & 31)
13
lumaStride += 32 - (lumaStride & 31);
14
- int cuWidth = (width + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
15
- int cuHeight = (lines + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
16
- int cuCount = cuWidth * cuHeight;
17
+ maxBlocksInRow = (width + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
18
+ maxBlocksInCol = (lines + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
19
+ int cuCount = maxBlocksInRow * maxBlocksInCol;
20
21
/* rounding the width to multiple of lowres CU size */
22
- width = cuWidth * X265_LOWRES_CU_SIZE;
23
- lines = cuHeight * X265_LOWRES_CU_SIZE;
24
+ width = maxBlocksInRow * X265_LOWRES_CU_SIZE;
25
+ lines = maxBlocksInCol * X265_LOWRES_CU_SIZE;
26
27
size_t planesize = lumaStride * (lines + 2 * origPic->m_lumaMarginY);
28
size_t padoffset = lumaStride * origPic->m_lumaMarginY + origPic->m_lumaMarginX;
29
30
{
31
for (int j = 0; j < bframes + 2; j++)
32
{
33
- CHECKED_MALLOC(rowSatds[i][j], int32_t, cuHeight);
34
+ CHECKED_MALLOC(rowSatds[i][j], int32_t, maxBlocksInCol);
35
CHECKED_MALLOC(lowresCosts[i][j], uint16_t, cuCount);
36
}
37
}
38
39
void Lowres::init(PicYuv *origPic, int poc)
40
{
41
bLastMiniGopBFrame = false;
42
- bScenecut = true; // could be a scene-cut, until ruled out by flash detection
43
+ bScenecut = false; // could be a scene-cut, until ruled out by flash detection
44
bKeyframe = false; // Not a keyframe unless identified by lookahead
45
frameNum = poc;
46
leadingBframes = 0;
47
x265_1.7.tar.gz/source/common/lowres.h -> x265_1.8.tar.gz/source/common/lowres.h
Changed
19
1
2
#include "picyuv.h"
3
#include "mv.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
struct ReferencePlanes
10
11
uint16_t(*lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2]);
12
int32_t* lowresMvCosts[2][X265_BFRAME_MAX + 1];
13
MV* lowresMvs[2][X265_BFRAME_MAX + 1];
14
+ uint32_t maxBlocksInRow;
15
+ uint32_t maxBlocksInCol;
16
17
/* used for vbvLookahead */
18
int plannedType[X265_LOOKAHEAD_MAX + 1];
19
x265_1.7.tar.gz/source/common/md5.cpp -> x265_1.8.tar.gz/source/common/md5.cpp
Changed
10
1
2
#include "common.h"
3
#include "md5.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private x265 namespace
8
9
#ifndef ARCH_BIG_ENDIAN
10
x265_1.7.tar.gz/source/common/md5.h -> x265_1.8.tar.gz/source/common/md5.h
Changed
10
1
2
3
#include "common.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
//private x265 namespace
8
9
typedef struct MD5Context
10
x265_1.7.tar.gz/source/common/mv.h -> x265_1.8.tar.gz/source/common/mv.h
Changed
10
1
2
#include "common.h"
3
#include "primitives.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private x265 namespace
8
9
#if _MSC_VER
10
x265_1.7.tar.gz/source/common/param.cpp -> x265_1.8.tar.gz/source/common/param.cpp
Changed
201
1
2
*/
3
4
#undef strtok_r
5
-char* strtok_r(char* str, const char* delim, char** nextp)
6
+static char* strtok_r(char* str, const char* delim, char** nextp)
7
{
8
if (!str)
9
str = *nextp;
10
11
12
#endif // if !defined(HAVE_STRTOK_R)
13
14
-using namespace x265;
15
+#if EXPORT_C_API
16
+
17
+/* these functions are exported as C functions (default) */
18
+using namespace X265_NS;
19
+extern "C" {
20
+
21
+#else
22
+
23
+/* these functions exist within private namespace (multilib) */
24
+namespace X265_NS {
25
+
26
+#endif
27
28
-extern "C"
29
x265_param *x265_param_alloc()
30
{
31
return (x265_param*)x265_malloc(sizeof(x265_param));
32
}
33
34
-extern "C"
35
void x265_param_free(x265_param* p)
36
{
37
x265_free(p);
38
}
39
40
-extern "C"
41
void x265_param_default(x265_param* param)
42
{
43
memset(param, 0, sizeof(x265_param));
44
45
/* Applying default values to all elements in the param structure */
46
- param->cpuid = x265::cpu_detect();
47
+ param->cpuid = X265_NS::cpu_detect();
48
param->bEnableWavefront = 1;
49
param->frameNumThreads = 0;
50
51
52
param->bEnableSsim = 0;
53
54
/* Source specifications */
55
- param->internalBitDepth = x265_max_bit_depth;
56
+ param->internalBitDepth = X265_DEPTH;
57
param->internalCsp = X265_CSP_I420;
58
59
param->levelIdc = 0;
60
61
param->subpelRefine = 2;
62
param->searchRange = 57;
63
param->maxNumMergeCand = 2;
64
+ param->limitReferences = 0;
65
param->bEnableWeightedPred = 1;
66
param->bEnableWeightedBiPred = 0;
67
param->bEnableEarlySkip = 0;
68
69
param->rc.rateControlMode = X265_RC_CRF;
70
param->rc.qp = 32;
71
param->rc.aqMode = X265_AQ_VARIANCE;
72
+ param->rc.qgSize = 32;
73
param->rc.aqStrength = 1.0;
74
param->rc.cuTree = 1;
75
param->rc.rfConstantMax = 0;
76
77
param->rc.zones = NULL;
78
param->rc.bEnableSlowFirstPass = 0;
79
param->rc.bStrictCbr = 0;
80
- param->rc.qgSize = 64; /* Same as maxCUSize */
81
82
/* Video Usability Information (VUI) */
83
param->vui.aspectRatioIdc = 0;
84
85
param->vui.defDispWinBottomOffset = 0;
86
}
87
88
-extern "C"
89
int x265_param_default_preset(x265_param* param, const char* preset, const char* tune)
90
{
91
- x265_param_default(param);
92
+#if EXPORT_C_API
93
+ ::x265_param_default(param);
94
+#else
95
+ X265_NS::x265_param_default(param);
96
+#endif
97
98
if (preset)
99
{
100
101
param->deblockingFilterBetaOffset = -2;
102
param->deblockingFilterTCOffset = -2;
103
param->bIntraInBFrames = 0;
104
- param->rdoqLevel = 1;
105
- param->psyRdoq = 30;
106
+ param->rdoqLevel = 2;
107
+ param->psyRdoq = 10.0;
108
param->psyRd = 0.5;
109
param->rc.ipFactor = 1.1;
110
param->rc.pbFactor = 1.1;
111
112
return 0;
113
}
114
115
-static double x265_atof(const char* str, bool& bError)
116
-{
117
- char *end;
118
- double v = strtod(str, &end);
119
-
120
- if (end == str || *end != '\0')
121
- bError = true;
122
- return v;
123
-}
124
-
125
static int parseName(const char* arg, const char* const* names, bool& bError)
126
{
127
for (int i = 0; names[i]; i++)
128
129
#define atof(str) x265_atof(str, bError)
130
#define atobool(str) (bNameWasBool = true, x265_atobool(str, bError))
131
132
-extern "C"
133
int x265_param_parse(x265_param* p, const char* name, const char* value)
134
{
135
bool bError = false;
136
137
}
138
}
139
OPT("cu-stats") p->bLogCuStats = atobool(value);
140
+ OPT("total-frames") p->totalFrames = atoi(value);
141
OPT("annexb") p->bAnnexB = atobool(value);
142
OPT("repeat-headers") p->bRepeatHeaders = atobool(value);
143
OPT("wpp") p->bEnableWavefront = atobool(value);
144
145
}
146
}
147
OPT("ref") p->maxNumReferences = atoi(value);
148
+ OPT("limit-refs") p->limitReferences = atoi(value);
149
OPT("weightp") p->bEnableWeightedPred = atobool(value);
150
OPT("weightb") p->bEnableWeightedBiPred = atobool(value);
151
OPT("cbqpoffs") p->cbQpOffset = atoi(value);
152
153
p->vui.chromaSampleLocTypeTopField = atoi(value);
154
p->vui.chromaSampleLocTypeBottomField = p->vui.chromaSampleLocTypeTopField;
155
}
156
- OPT("crop-rect")
157
+ OPT2("display-window", "crop-rect")
158
{
159
p->vui.bEnableDefaultDisplayWindowFlag = 1;
160
bError |= sscanf(value, "%d,%d,%d,%d",
161
162
p->rc.bStatRead = pass & 2;
163
}
164
OPT("stats") p->rc.statFileName = strdup(value);
165
- OPT("csv") p->csvfn = strdup(value);
166
OPT("scaling-list") p->scalingLists = strdup(value);
167
OPT2("pools", "numa-pools") p->numaPools = strdup(value);
168
OPT("lambda-file") p->rc.lambdaFileName = strdup(value);
169
170
return bError ? X265_PARAM_BAD_VALUE : 0;
171
}
172
173
-namespace x265 {
174
+} /* end extern "C" or namespace */
175
+
176
+namespace X265_NS {
177
// internal encoder functions
178
179
int x265_atoi(const char* str, bool& bError)
180
181
return v;
182
}
183
184
+double x265_atof(const char* str, bool& bError)
185
+{
186
+ char *end;
187
+ double v = strtod(str, &end);
188
+
189
+ if (end == str || *end != '\0')
190
+ bError = true;
191
+ return v;
192
+}
193
+
194
/* cpu name can be:
195
* auto || true - x265::cpu_detect()
196
* false || no - disabled
197
198
if (isdigit(value[0]))
199
cpu = x265_atoi(value, bError);
200
else
201
x265_1.7.tar.gz/source/common/param.h -> x265_1.8.tar.gz/source/common/param.h
Changed
48
1
2
* Copyright (C) 2013 x265 project
3
*
4
* Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
5
+ * Praveen Kumar Tiwari <praveen@multicorewareinc.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
10
#ifndef X265_PARAM_H
11
#define X265_PARAM_H
12
13
-namespace x265 {
14
+namespace X265_NS {
15
+
16
int x265_check_params(x265_param *param);
17
int x265_set_globals(x265_param *param);
18
void x265_print_params(x265_param *param);
19
20
void x265_param_apply_fastfirstpass(x265_param *p);
21
char* x265_param2string(x265_param *param);
22
int x265_atoi(const char *str, bool& bError);
23
+double x265_atof(const char *str, bool& bError);
24
int parseCpuName(const char *value, bool& bError);
25
void setParamAspectRatio(x265_param *p, int width, int height);
26
void getParamAspectRatio(x265_param *p, int& width, int& height);
27
bool parseLambdaFile(x265_param *param);
28
29
/* this table is kept internal to avoid confusion, since log level indices start at -1 */
30
-static const char * const logLevelNames[] = { "none", "error", "warning", "info", "frame", "debug", "full", 0 };
31
+static const char * const logLevelNames[] = { "none", "error", "warning", "info", "debug", "full", 0 };
32
+
33
+#if EXPORT_C_API
34
+#define PARAM_NS
35
+#else
36
+/* declare param functions within private namespace */
37
+void x265_param_free(x265_param *);
38
+x265_param* x265_param_alloc();
39
+void x265_param_default(x265_param *param);
40
+int x265_param_default_preset(x265_param *, const char *preset, const char *tune);
41
+int x265_param_apply_profile(x265_param *, const char *profile);
42
+int x265_param_parse(x265_param *p, const char *name, const char *value);
43
+#define PARAM_NS X265_NS
44
+#endif
45
46
#define MAXPARAMSIZE 2000
47
}
48
x265_1.7.tar.gz/source/common/piclist.cpp -> x265_1.8.tar.gz/source/common/piclist.cpp
Changed
10
1
2
#include "piclist.h"
3
#include "frame.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
void PicList::pushFront(Frame& curFrame)
9
{
10
x265_1.7.tar.gz/source/common/piclist.h -> x265_1.8.tar.gz/source/common/piclist.h
Changed
14
1
2
#ifndef X265_PICLIST_H
3
#define X265_PICLIST_H
4
5
-#include <cstdlib>
6
+#include "common.h"
7
+
8
+namespace X265_NS {
9
10
-namespace x265 {
11
class Frame;
12
13
class PicList
14
x265_1.7.tar.gz/source/common/picyuv.cpp -> x265_1.8.tar.gz/source/common/picyuv.cpp
Changed
147
1
2
#include "slice.h"
3
#include "primitives.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
PicYuv::PicYuv()
9
{
10
11
padx++;
12
pady++;
13
14
- if (pic.bitDepth < X265_DEPTH)
15
- {
16
- pixel *yPixel = m_picOrg[0];
17
- pixel *uPixel = m_picOrg[1];
18
- pixel *vPixel = m_picOrg[2];
19
+ X265_CHECK(pic.bitDepth >= 8, "pic.bitDepth check failure");
20
21
- uint8_t *yChar = (uint8_t*)pic.planes[0];
22
- uint8_t *uChar = (uint8_t*)pic.planes[1];
23
- uint8_t *vChar = (uint8_t*)pic.planes[2];
24
- int shift = X265_MAX(0, X265_DEPTH - pic.bitDepth);
25
-
26
- primitives.planecopy_cp(yChar, pic.stride[0] / sizeof(*yChar), yPixel, m_stride, width, height, shift);
27
- primitives.planecopy_cp(uChar, pic.stride[1] / sizeof(*uChar), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
28
- primitives.planecopy_cp(vChar, pic.stride[2] / sizeof(*vChar), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
29
- }
30
- else if (pic.bitDepth == 8)
31
+ if (pic.bitDepth == 8)
32
{
33
- pixel *yPixel = m_picOrg[0];
34
- pixel *uPixel = m_picOrg[1];
35
- pixel *vPixel = m_picOrg[2];
36
+#if (X265_DEPTH > 8)
37
+ {
38
+ pixel *yPixel = m_picOrg[0];
39
+ pixel *uPixel = m_picOrg[1];
40
+ pixel *vPixel = m_picOrg[2];
41
+
42
+ uint8_t *yChar = (uint8_t*)pic.planes[0];
43
+ uint8_t *uChar = (uint8_t*)pic.planes[1];
44
+ uint8_t *vChar = (uint8_t*)pic.planes[2];
45
+ int shift = (X265_DEPTH - 8);
46
+
47
+ primitives.planecopy_cp(yChar, pic.stride[0] / sizeof(*yChar), yPixel, m_stride, width, height, shift);
48
+ primitives.planecopy_cp(uChar, pic.stride[1] / sizeof(*uChar), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
49
+ primitives.planecopy_cp(vChar, pic.stride[2] / sizeof(*vChar), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
50
+ }
51
+#else /* Case for (X265_DEPTH == 8) */
52
+ // TODO: Does we need this path? may merge into above in future
53
+ {
54
+ pixel *yPixel = m_picOrg[0];
55
+ pixel *uPixel = m_picOrg[1];
56
+ pixel *vPixel = m_picOrg[2];
57
58
- uint8_t *yChar = (uint8_t*)pic.planes[0];
59
- uint8_t *uChar = (uint8_t*)pic.planes[1];
60
- uint8_t *vChar = (uint8_t*)pic.planes[2];
61
+ uint8_t *yChar = (uint8_t*)pic.planes[0];
62
+ uint8_t *uChar = (uint8_t*)pic.planes[1];
63
+ uint8_t *vChar = (uint8_t*)pic.planes[2];
64
65
- for (int r = 0; r < height; r++)
66
- {
67
- memcpy(yPixel, yChar, width * sizeof(pixel));
68
+ for (int r = 0; r < height; r++)
69
+ {
70
+ memcpy(yPixel, yChar, width * sizeof(pixel));
71
72
- yPixel += m_stride;
73
- yChar += pic.stride[0] / sizeof(*yChar);
74
- }
75
+ yPixel += m_stride;
76
+ yChar += pic.stride[0] / sizeof(*yChar);
77
+ }
78
79
- for (int r = 0; r < height >> m_vChromaShift; r++)
80
- {
81
- memcpy(uPixel, uChar, (width >> m_hChromaShift) * sizeof(pixel));
82
- memcpy(vPixel, vChar, (width >> m_hChromaShift) * sizeof(pixel));
83
+ for (int r = 0; r < height >> m_vChromaShift; r++)
84
+ {
85
+ memcpy(uPixel, uChar, (width >> m_hChromaShift) * sizeof(pixel));
86
+ memcpy(vPixel, vChar, (width >> m_hChromaShift) * sizeof(pixel));
87
88
- uPixel += m_strideC;
89
- vPixel += m_strideC;
90
- uChar += pic.stride[1] / sizeof(*uChar);
91
- vChar += pic.stride[2] / sizeof(*vChar);
92
+ uPixel += m_strideC;
93
+ vPixel += m_strideC;
94
+ uChar += pic.stride[1] / sizeof(*uChar);
95
+ vChar += pic.stride[2] / sizeof(*vChar);
96
+ }
97
}
98
+#endif /* (X265_DEPTH > 8) */
99
}
100
else /* pic.bitDepth > 8 */
101
{
102
+ /* defensive programming, mask off bits that are supposed to be zero */
103
+ uint16_t mask = (1 << X265_DEPTH) - 1;
104
+ int shift = abs(pic.bitDepth - X265_DEPTH);
105
pixel *yPixel = m_picOrg[0];
106
pixel *uPixel = m_picOrg[1];
107
pixel *vPixel = m_picOrg[2];
108
109
uint16_t *uShort = (uint16_t*)pic.planes[1];
110
uint16_t *vShort = (uint16_t*)pic.planes[2];
111
112
- /* defensive programming, mask off bits that are supposed to be zero */
113
- uint16_t mask = (1 << X265_DEPTH) - 1;
114
- int shift = X265_MAX(0, pic.bitDepth - X265_DEPTH);
115
-
116
- /* shift and mask pixels to final size */
117
-
118
- primitives.planecopy_sp(yShort, pic.stride[0] / sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
119
- primitives.planecopy_sp(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
120
- primitives.planecopy_sp(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
121
+ if (pic.bitDepth > X265_DEPTH)
122
+ {
123
+ /* shift right and mask pixels to final size */
124
+ primitives.planecopy_sp(yShort, pic.stride[0] / sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
125
+ primitives.planecopy_sp(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
126
+ primitives.planecopy_sp(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
127
+ }
128
+ else /* Case for (pic.bitDepth <= X265_DEPTH) */
129
+ {
130
+ /* shift left and mask pixels to final size */
131
+ primitives.planecopy_sp_shl(yShort, pic.stride[0] / sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
132
+ primitives.planecopy_sp_shl(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
133
+ primitives.planecopy_sp_shl(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
134
+ }
135
}
136
137
/* extend the right edge if width was not multiple of the minimum CU size */
138
139
}
140
}
141
142
-namespace x265 {
143
+namespace X265_NS {
144
145
template<uint32_t OUTPUT_BITDEPTH_DIV8>
146
static void md5_block(MD5Context& md5, const pixel* plane, uint32_t n)
147
x265_1.7.tar.gz/source/common/picyuv.h -> x265_1.8.tar.gz/source/common/picyuv.h
Changed
10
1
2
#include "md5.h"
3
#include "x265.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class ShortYuv;
10
x265_1.7.tar.gz/source/common/pixel.cpp -> x265_1.8.tar.gz/source/common/pixel.cpp
Changed
201
1
2
3
#include <cstdlib> // abs()
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
namespace {
9
// place functions in anonymous namespace (file static)
10
11
}
12
13
template<int lx, int ly, class T1, class T2>
14
-int sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t stride_pix2)
15
+sse_ret_t sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t stride_pix2)
16
{
17
- int sum = 0;
18
+ sse_ret_t sum = 0;
19
int tmp;
20
21
for (int y = 0; y < ly; y++)
22
23
return (a + s) ^ s;
24
}
25
26
-int satd_4x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
27
+static int satd_4x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
28
{
29
sum2_t tmp[4][2];
30
sum2_t a0, a1, a2, a3, b0, b1;
31
32
}
33
34
// x264's SWAR version of satd 8x4, performs two 4x4 SATDs at once
35
-int satd_8x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
36
+static int satd_8x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
37
{
38
sum2_t tmp[4][4];
39
sum2_t a0, a1, a2, a3;
40
41
return (int)sum;
42
}
43
44
-int sa8d_8x8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
45
+inline int sa8d_8x8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
46
{
47
return (int)((_sa8d_8x8(pix1, i_pix1, pix2, i_pix2) + 2) >> 2);
48
}
49
50
return (int)sum;
51
}
52
53
-int sa8d_8x8(const int16_t* pix1, intptr_t i_pix1)
54
+static int sa8d_8x8(const int16_t* pix1, intptr_t i_pix1)
55
{
56
return (int)((_sa8d_8x8(pix1, i_pix1) + 2) >> 2);
57
}
58
59
-int sa8d_16x16(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
60
+static int sa8d_16x16(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
61
{
62
int sum = _sa8d_8x8(pix1, i_pix1, pix2, i_pix2)
63
+ _sa8d_8x8(pix1 + 8, i_pix1, pix2 + 8, i_pix2)
64
65
dst[k * blockSize + l] = src[l * stride + k];
66
}
67
68
-void weight_sp_c(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
69
+static void weight_sp_c(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
70
{
71
int x, y;
72
73
74
}
75
}
76
77
-void weight_pp_c(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
78
+static void weight_pp_c(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
79
{
80
int x, y;
81
82
83
}
84
}
85
86
-void scale1D_128to64(pixel *dst, const pixel *src)
87
+static void scale1D_128to64(pixel *dst, const pixel *src)
88
{
89
int x;
90
const pixel* src1 = src;
91
92
}
93
}
94
95
-void scale2D_64to32(pixel* dst, const pixel* src, intptr_t stride)
96
+static void scale2D_64to32(pixel* dst, const pixel* src, intptr_t stride)
97
{
98
uint32_t x, y;
99
100
101
}
102
}
103
104
+static
105
void frame_init_lowres_core(const pixel* src0, pixel* dst0, pixel* dsth, pixel* dstv, pixel* dstc,
106
intptr_t src_stride, intptr_t dst_stride, int width, int height)
107
{
108
109
}
110
111
/* structural similarity metric */
112
-void ssim_4x4x2_core(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4])
113
+static void ssim_4x4x2_core(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4])
114
{
115
for (int z = 0; z < 2; z++)
116
{
117
118
}
119
}
120
121
-float ssim_end_1(int s1, int s2, int ss, int s12)
122
+static float ssim_end_1(int s1, int s2, int ss, int s12)
123
{
124
/* Maximum value for 10-bit is: ss*64 = (2^10-1)^2*16*4*64 = 4286582784, which will overflow in some cases.
125
* s1*s1, s2*s2, and s1*s2 also obtain this value for edge cases: ((2^10-1)*16*4)^2 = 4286582784.
126
127
128
#define PIXEL_MAX ((1 << X265_DEPTH) - 1)
129
#if HIGH_BIT_DEPTH
130
- X265_CHECK(X265_DEPTH == 10, "ssim invalid depth\n");
131
+ X265_CHECK((X265_DEPTH == 10) || (X265_DEPTH == 12), "ssim invalid depth\n");
132
#define type float
133
static const float ssim_c1 = (float)(.01 * .01 * PIXEL_MAX * PIXEL_MAX * 64);
134
static const float ssim_c2 = (float)(.03 * .03 * PIXEL_MAX * PIXEL_MAX * 64 * 63);
135
136
#undef PIXEL_MAX
137
}
138
139
-float ssim_end_4(int sum0[5][4], int sum1[5][4], int width)
140
+static float ssim_end_4(int sum0[5][4], int sum1[5][4], int width)
141
{
142
float ssim = 0.0;
143
144
145
}
146
}
147
148
-void planecopy_cp_c(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift)
149
+static void planecopy_cp_c(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift)
150
{
151
for (int r = 0; r < height; r++)
152
{
153
154
}
155
}
156
157
-void planecopy_sp_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
158
+static void planecopy_sp_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
159
{
160
for (int r = 0; r < height; r++)
161
{
162
163
}
164
}
165
166
+static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
167
+{
168
+ for (int r = 0; r < height; r++)
169
+ {
170
+ for (int c = 0; c < width; c++)
171
+ dst[c] = (pixel)((src[c] << shift) & mask);
172
+
173
+ dst += dstStride;
174
+ src += srcStride;
175
+ }
176
+}
177
+
178
/* Estimate the total amount of influence on future quality that could be had if we
179
* were to improve the reference samples used to inter predict any given CU. */
180
-void estimateCUPropagateCost(int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts,
181
+static void estimateCUPropagateCost(int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts,
182
const int32_t* invQscales, const double* fpsFactor, int len)
183
{
184
double fps = *fpsFactor / 256;
185
186
}
187
} // end anonymous namespace
188
189
-namespace x265 {
190
+namespace X265_NS {
191
// x265 private namespace
192
193
/* Extend the edges of a picture so that it may safely be used for motion
194
195
196
p.planecopy_cp = planecopy_cp_c;
197
p.planecopy_sp = planecopy_sp_c;
198
+ p.planecopy_sp_shl = planecopy_sp_shl_c;
199
p.propagateCost = estimateCUPropagateCost;
200
}
201
x265_1.7.tar.gz/source/common/predict.cpp -> x265_1.8.tar.gz/source/common/predict.cpp
Changed
46
1
2
#include "predict.h"
3
#include "primitives.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
#if _MSC_VER
9
#pragma warning(disable: 4127) // conditional expression is constant
10
11
// Fill left & below-left samples
12
adiTemp += picStride;
13
adi--;
14
- pNeighborFlags--;
15
- for (int j = 0; j < leftUnits; j++)
16
+ // NOTE: over copy here, but reduce condition operators
17
+ for (int j = 0; j < leftUnits * unitHeight; j++)
18
{
19
- if (*pNeighborFlags)
20
- for (int i = 0; i < unitHeight; i++)
21
- adi[-i] = adiTemp[i * picStride];
22
-
23
- adiTemp += unitHeight * picStride;
24
- adi -= unitHeight;
25
- pNeighborFlags--;
26
+ adi[-j] = adiTemp[j * picStride];
27
}
28
29
// Fill above & above-right samples
30
adiTemp = adiOrigin - picStride;
31
adi = adiLineBuffer + (leftUnits * unitHeight) + unitWidth;
32
- pNeighborFlags = bNeighborFlags + leftUnits + 1;
33
- for (int j = 0; j < aboveUnits; j++)
34
- {
35
- if (*pNeighborFlags)
36
- memcpy(adi, adiTemp, unitWidth * sizeof(*adiTemp));
37
- adiTemp += unitWidth;
38
- adi += unitWidth;
39
- pNeighborFlags++;
40
- }
41
+ // NOTE: over copy here, but reduce condition operators
42
+ memcpy(adi, adiTemp, aboveUnits * unitWidth * sizeof(*adiTemp));
43
44
// Pad reference samples when necessary
45
int curr = 0;
46
x265_1.7.tar.gz/source/common/predict.h -> x265_1.8.tar.gz/source/common/predict.h
Changed
10
1
2
#include "shortyuv.h"
3
#include "yuv.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
8
class CUData;
9
class Slice;
10
x265_1.7.tar.gz/source/common/primitives.cpp -> x265_1.8.tar.gz/source/common/primitives.cpp
Changed
155
1
2
#include "common.h"
3
#include "primitives.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// x265 private namespace
8
9
extern const uint8_t lumaPartitionMapTable[] =
10
11
void setupFilterPrimitives_c(EncoderPrimitives &p);
12
void setupIntraPrimitives_c(EncoderPrimitives &p);
13
void setupLoopFilterPrimitives_c(EncoderPrimitives &p);
14
+void setupSaoPrimitives_c(EncoderPrimitives &p);
15
16
void setupCPrimitives(EncoderPrimitives &p)
17
{
18
19
setupFilterPrimitives_c(p); // ipfilter.cpp
20
setupIntraPrimitives_c(p); // intrapred.cpp
21
setupLoopFilterPrimitives_c(p); // loopfilter.cpp
22
+ setupSaoPrimitives_c(p); // sao.cpp
23
}
24
25
void setupAliasPrimitives(EncoderPrimitives &p)
26
27
/* at HIGH_BIT_DEPTH, pixel == short so we can alias many primitives */
28
for (int i = 0; i < NUM_CU_SIZES; i++)
29
{
30
- p.cu[i].sse_pp = (pixelcmp_t)p.cu[i].sse_ss;
31
+ p.cu[i].sse_pp = (pixel_sse_t)p.cu[i].sse_ss;
32
33
p.cu[i].copy_ps = (copy_ps_t)p.pu[i].copy_pp;
34
p.cu[i].copy_sp = (copy_sp_t)p.pu[i].copy_pp;
35
36
37
p.chroma[X265_CSP_I422].cu[BLOCK_422_2x4].sse_pp = NULL;
38
}
39
-}
40
-using namespace x265;
41
42
-/* cpuid >= 0 - force CPU type
43
- * cpuid < 0 - auto-detect if uninitialized */
44
-void x265_setup_primitives(x265_param *param, int cpuid)
45
+void x265_report_simd(x265_param* param)
46
{
47
- if (cpuid < 0)
48
- cpuid = x265::cpu_detect();
49
-
50
- // initialize global variables
51
- if (!primitives.pu[0].sad)
52
- {
53
- setupCPrimitives(primitives);
54
-
55
- /* We do not want the encoder to use the un-optimized intra all-angles
56
- * C references. It is better to call the individual angle functions
57
- * instead. We must check for NULL before using this primitive */
58
- for (int i = 0; i < NUM_TR_SIZE; i++)
59
- primitives.cu[i].intra_pred_allangs = NULL;
60
-
61
-#if ENABLE_ASSEMBLY
62
- setupInstrinsicPrimitives(primitives, cpuid);
63
- setupAssemblyPrimitives(primitives, cpuid);
64
-#else
65
- x265_log(param, X265_LOG_WARNING, "Assembly not supported in this binary\n");
66
-#endif
67
-
68
- setupAliasPrimitives(primitives);
69
- }
70
-
71
if (param->logLevel >= X265_LOG_INFO)
72
{
73
+ int cpuid = param->cpuid;
74
+
75
char buf[1000];
76
char *p = buf + sprintf(buf, "using cpu capabilities:");
77
char *none = p;
78
- for (int i = 0; x265::cpu_names[i].flags; i++)
79
+ for (int i = 0; X265_NS::cpu_names[i].flags; i++)
80
{
81
- if (!strcmp(x265::cpu_names[i].name, "SSE")
82
+ if (!strcmp(X265_NS::cpu_names[i].name, "SSE")
83
&& (cpuid & X265_CPU_SSE2))
84
continue;
85
- if (!strcmp(x265::cpu_names[i].name, "SSE2")
86
+ if (!strcmp(X265_NS::cpu_names[i].name, "SSE2")
87
&& (cpuid & (X265_CPU_SSE2_IS_FAST | X265_CPU_SSE2_IS_SLOW)))
88
continue;
89
- if (!strcmp(x265::cpu_names[i].name, "SSE3")
90
+ if (!strcmp(X265_NS::cpu_names[i].name, "SSE3")
91
&& (cpuid & X265_CPU_SSSE3 || !(cpuid & X265_CPU_CACHELINE_64)))
92
continue;
93
- if (!strcmp(x265::cpu_names[i].name, "SSE4.1")
94
+ if (!strcmp(X265_NS::cpu_names[i].name, "SSE4.1")
95
&& (cpuid & X265_CPU_SSE42))
96
continue;
97
- if (!strcmp(x265::cpu_names[i].name, "BMI1")
98
+ if (!strcmp(X265_NS::cpu_names[i].name, "BMI1")
99
&& (cpuid & X265_CPU_BMI2))
100
continue;
101
- if ((cpuid & x265::cpu_names[i].flags) == x265::cpu_names[i].flags
102
- && (!i || x265::cpu_names[i].flags != x265::cpu_names[i - 1].flags))
103
- p += sprintf(p, " %s", x265::cpu_names[i].name);
104
+ if ((cpuid & X265_NS::cpu_names[i].flags) == X265_NS::cpu_names[i].flags
105
+ && (!i || X265_NS::cpu_names[i].flags != X265_NS::cpu_names[i - 1].flags))
106
+ p += sprintf(p, " %s", X265_NS::cpu_names[i].name);
107
}
108
109
if (p == none)
110
111
}
112
}
113
114
+void x265_setup_primitives(x265_param *param)
115
+{
116
+ if (!primitives.pu[0].sad)
117
+ {
118
+ setupCPrimitives(primitives);
119
+
120
+ /* We do not want the encoder to use the un-optimized intra all-angles
121
+ * C references. It is better to call the individual angle functions
122
+ * instead. We must check for NULL before using this primitive */
123
+ for (int i = 0; i < NUM_TR_SIZE; i++)
124
+ primitives.cu[i].intra_pred_allangs = NULL;
125
+
126
+#if ENABLE_ASSEMBLY
127
+ setupInstrinsicPrimitives(primitives, param->cpuid);
128
+ setupAssemblyPrimitives(primitives, param->cpuid);
129
+#endif
130
+
131
+ setupAliasPrimitives(primitives);
132
+ }
133
+
134
+ x265_report_simd(param);
135
+}
136
+}
137
+
138
#if ENABLE_ASSEMBLY
139
/* these functions are implemented in assembly. When assembly is not being
140
* compiled, they are unnecessary and can be NOPs */
141
#else
142
extern "C" {
143
-int x265_cpu_cpuid_test(void) { return 0; }
144
-void x265_cpu_emms(void) {}
145
-void x265_cpu_cpuid(uint32_t, uint32_t *eax, uint32_t *, uint32_t *, uint32_t *) { *eax = 0; }
146
-void x265_cpu_xgetbv(uint32_t, uint32_t *, uint32_t *) {}
147
+int PFX(cpu_cpuid_test)(void) { return 0; }
148
+void PFX(cpu_emms)(void) {}
149
+void PFX(cpu_cpuid)(uint32_t, uint32_t *eax, uint32_t *, uint32_t *, uint32_t *) { *eax = 0; }
150
+void PFX(cpu_xgetbv)(uint32_t, uint32_t *, uint32_t *) {}
151
+void PFX(cpu_neon_test)(void) {}
152
+int PFX(cpu_fast_neon_mrc_test)(void) { return 0; }
153
}
154
#endif
155
x265_1.7.tar.gz/source/common/primitives.h -> x265_1.8.tar.gz/source/common/primitives.h
Changed
108
1
2
#include "common.h"
3
#include "cpu.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// x265 private namespace
8
9
enum LumaPU
10
11
12
typedef int (*pixelcmp_t)(const pixel* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride); // fenc is aligned
13
typedef int (*pixelcmp_ss_t)(const int16_t* fenc, intptr_t fencstride, const int16_t* fref, intptr_t frefstride);
14
+typedef sse_ret_t (*pixel_sse_t)(const pixel* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride); // fenc is aligned
15
+typedef sse_ret_t (*pixel_sse_ss_t)(const int16_t* fenc, intptr_t fencstride, const int16_t* fref, intptr_t frefstride);
16
typedef int (*pixel_ssd_s_t)(const int16_t* fenc, intptr_t fencstride);
17
typedef void (*pixelcmp_x4_t)(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
18
typedef void (*pixelcmp_x3_t)(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
19
20
typedef void (*saoCuOrgE2_t)(pixel* rec, int8_t* pBufft, int8_t* pBuff1, int8_t* offsetEo, int lcuWidth, intptr_t stride);
21
typedef void (*saoCuOrgE3_t)(pixel* rec, int8_t* upBuff1, int8_t* m_offsetEo, intptr_t stride, int startX, int endX);
22
typedef void (*saoCuOrgB0_t)(pixel* rec, const int8_t* offsetBo, int ctuWidth, int ctuHeight, intptr_t stride);
23
+
24
+typedef void (*saoCuStatsBO_t)(const pixel *fenc, const pixel *rec, intptr_t stride, int endX, int endY, int32_t *stats, int32_t *count);
25
+typedef void (*saoCuStatsE0_t)(const pixel *fenc, const pixel *rec, intptr_t stride, int endX, int endY, int32_t *stats, int32_t *count);
26
+typedef void (*saoCuStatsE1_t)(const pixel *fenc, const pixel *rec, intptr_t stride, int8_t *upBuff1, int endX, int endY, int32_t *stats, int32_t *count);
27
+typedef void (*saoCuStatsE2_t)(const pixel *fenc, const pixel *rec, intptr_t stride, int8_t *upBuff1, int8_t *upBuff, int endX, int endY, int32_t *stats, int32_t *count);
28
+typedef void (*saoCuStatsE3_t)(const pixel *fenc, const pixel *rec, intptr_t stride, int8_t *upBuff1, int endX, int endY, int32_t *stats, int32_t *count);
29
+
30
typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX);
31
typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
32
typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
33
34
typedef int (*scanPosLast_t)(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig, const uint16_t* scanCG4x4, const int trSize);
35
typedef uint32_t (*findPosFirstLast_t)(const int16_t *dstCoeff, const intptr_t trSize, const uint16_t scanTbl[16]);
36
37
+typedef uint32_t (*costCoeffNxN_t)(const uint16_t *scan, const coeff_t *coeff, intptr_t trSize, uint16_t *absCoeff, const uint8_t *tabSigCtx, uint32_t scanFlagMask, uint8_t *baseCtx, int offset, int scanPosSigOff, int subPosBase);
38
+typedef uint32_t (*costCoeffRemain_t)(uint16_t *absCoeff, int numNonZero, int idx);
39
+typedef uint32_t (*costC1C2Flag_t)(uint16_t *absCoeff, intptr_t numC1Flag, uint8_t *baseCtxMod, intptr_t ctxOffset);
40
+
41
/* Function pointers to optimized encoder primitives. Each pointer can reference
42
* either an assembly routine, a SIMD intrinsic primitive, or a C function */
43
struct EncoderPrimitives
44
45
copy_pp_t copy_pp; // alias to pu[].copy_pp
46
47
var_t var; // block internal variance
48
- pixelcmp_t sse_pp; // Sum of Square Error (pixel, pixel) fenc alignment not assumed
49
- pixelcmp_ss_t sse_ss; // Sum of Square Error (short, short) fenc alignment not assumed
50
+
51
+ pixel_sse_t sse_pp; // Sum of Square Error (pixel, pixel) fenc alignment not assumed
52
+ pixel_sse_ss_t sse_ss; // Sum of Square Error (short, short) fenc alignment not assumed
53
pixelcmp_t psy_cost_pp; // difference in AC energy between two pixel blocks
54
pixelcmp_ss_t psy_cost_ss; // difference in AC energy between two signed residual blocks
55
pixel_ssd_s_t ssd_s; // Sum of Square Error (residual coeff to self)
56
57
saoCuOrgE3_t saoCuOrgE3[2];
58
saoCuOrgB0_t saoCuOrgB0;
59
60
+ saoCuStatsBO_t saoCuStatsBO;
61
+ saoCuStatsE0_t saoCuStatsE0;
62
+ saoCuStatsE1_t saoCuStatsE1;
63
+ saoCuStatsE2_t saoCuStatsE2;
64
+ saoCuStatsE3_t saoCuStatsE3;
65
+
66
downscale_t frameInitLowres;
67
cutree_propagate_cost propagateCost;
68
69
extendCURowBorder_t extendRowBorder;
70
planecopy_cp_t planecopy_cp;
71
planecopy_sp_t planecopy_sp;
72
+ planecopy_sp_t planecopy_sp_shl;
73
74
weightp_sp_t weight_sp;
75
weightp_pp_t weight_pp;
76
77
scanPosLast_t scanPosLast;
78
findPosFirstLast_t findPosFirstLast;
79
80
+ costCoeffNxN_t costCoeffNxN;
81
+ costCoeffRemain_t costCoeffRemain;
82
+ costC1C2Flag_t costC1C2Flag;
83
+
84
+
85
/* There is one set of chroma primitives per color space. An encoder will
86
* have just a single color space and thus it will only ever use one entry
87
* in this array. However we always fill all entries in the array in case
88
89
struct CUChroma
90
{
91
pixelcmp_t sa8d; // if chroma CU is not multiple of 8x8, will use satd
92
- pixelcmp_t sse_pp;
93
+ pixel_sse_t sse_pp;
94
pixel_sub_ps_t sub_ps;
95
pixel_add_ps_t add_ps;
96
97
98
void setupAliasPrimitives(EncoderPrimitives &p);
99
}
100
101
+#if !EXPORT_C_API
102
+extern const int PFX(max_bit_depth);
103
+extern const char* PFX(version_str);
104
+extern const char* PFX(build_info_str);
105
+#endif
106
+
107
#endif // ifndef X265_PRIMITIVES_H
108
x265_1.7.tar.gz/source/common/quant.cpp -> x265_1.8.tar.gz/source/common/quant.cpp
Changed
201
1
2
#include "cudata.h"
3
#include "contexts.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
#define SIGN(x,y) ((x^(y >> 31))-(y >> 31))
9
10
11
m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
12
m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
13
m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
14
- m_tqBypass = false;
15
16
return m_resiDctCoeff && m_fencShortBuf;
17
}
18
19
20
void Quant::setQPforQuant(const CUData& ctu, int qp)
21
{
22
- m_tqBypass = !!ctu.m_tqBypass[0];
23
- if (m_tqBypass)
24
- return;
25
m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] : NULL;
26
m_qpParam[TEXT_LUMA].setQpParam(qp + QP_BD_OFFSET);
27
setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, ctu.m_chromaFormat);
28
29
}
30
31
/* To minimize the distortion only. No rate is considered */
32
-uint32_t Quant::signBitHidingHDQ(int16_t* coeff, int32_t* deltaU, uint32_t numSig, const TUEntropyCodingParameters &codeParams)
33
+uint32_t Quant::signBitHidingHDQ(int16_t* coeff, int32_t* deltaU, uint32_t numSig, const TUEntropyCodingParameters &codeParams, uint32_t log2TrSize)
34
{
35
- const uint32_t log2TrSizeCG = codeParams.log2TrSizeCG;
36
+ uint32_t trSize = 1 << log2TrSize;
37
const uint16_t* scan = codeParams.scan;
38
- bool lastCG = true;
39
40
- for (int cg = (1 << (log2TrSizeCG * 2)) - 1; cg >= 0; cg--)
41
+ uint8_t coeffNum[MLS_GRP_NUM]; // value range[0, 16]
42
+ uint16_t coeffSign[MLS_GRP_NUM]; // bit mask map for non-zero coeff sign
43
+ uint16_t coeffFlag[MLS_GRP_NUM]; // bit mask map for non-zero coeff
44
+
45
+#if CHECKED_BUILD || _DEBUG
46
+ // clean output buffer, the asm version of scanPosLast Never output anything after latest non-zero coeff group
47
+ memset(coeffNum, 0, sizeof(coeffNum));
48
+ memset(coeffSign, 0, sizeof(coeffNum));
49
+ memset(coeffFlag, 0, sizeof(coeffNum));
50
+#endif
51
+ const int lastScanPos = primitives.scanPosLast(codeParams.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codeParams.scanType], trSize);
52
+ const int cgLastScanPos = (lastScanPos >> LOG2_SCAN_SET_SIZE);
53
+ unsigned long tmp;
54
+
55
+ // first CG need specially processing
56
+ const uint32_t correctOffset = 0x0F & (lastScanPos ^ 0xF);
57
+ coeffFlag[cgLastScanPos] <<= correctOffset;
58
+
59
+ for (int cg = cgLastScanPos; cg >= 0; cg--)
60
{
61
int cgStartPos = cg << LOG2_SCAN_SET_SIZE;
62
int n;
63
64
+#if CHECKED_BUILD || _DEBUG
65
for (n = SCAN_SET_SIZE - 1; n >= 0; --n)
66
if (coeff[scan[n + cgStartPos]])
67
break;
68
- if (n < 0)
69
- continue;
70
+ int lastNZPosInCG0 = n;
71
+#endif
72
73
- int lastNZPosInCG = n;
74
+ if (coeffNum[cg] == 0)
75
+ {
76
+ X265_CHECK(lastNZPosInCG0 < 0, "all zero block check failure\n");
77
+ continue;
78
+ }
79
80
+#if CHECKED_BUILD || _DEBUG
81
for (n = 0;; n++)
82
if (coeff[scan[n + cgStartPos]])
83
break;
84
85
- int firstNZPosInCG = n;
86
+ int firstNZPosInCG0 = n;
87
+#endif
88
+
89
+ CLZ(tmp, coeffFlag[cg]);
90
+ const int firstNZPosInCG = (15 ^ tmp);
91
+
92
+ CTZ(tmp, coeffFlag[cg]);
93
+ const int lastNZPosInCG = (15 ^ tmp);
94
+
95
+ X265_CHECK(firstNZPosInCG0 == firstNZPosInCG, "firstNZPosInCG0 check failure\n");
96
+ X265_CHECK(lastNZPosInCG0 == lastNZPosInCG, "lastNZPosInCG0 check failure\n");
97
98
if (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD)
99
{
100
101
if (signbit != (absSum & 0x1)) // compare signbit with sum_parity
102
{
103
int minCostInc = MAX_INT, minPos = -1, curCost = MAX_INT;
104
- int16_t finalChange = 0, curChange = 0;
105
+ int32_t finalChange = 0, curChange = 0;
106
+ uint32_t cgFlags = coeffFlag[cg];
107
+ if (cg == cgLastScanPos)
108
+ cgFlags >>= correctOffset;
109
110
- for (n = (lastCG ? lastNZPosInCG : SCAN_SET_SIZE - 1); n >= 0; --n)
111
+ for (n = (cg == cgLastScanPos ? lastNZPosInCG : SCAN_SET_SIZE - 1); n >= 0; --n)
112
{
113
uint32_t blkPos = scan[n + cgStartPos];
114
- if (coeff[blkPos])
115
+ X265_CHECK(!!coeff[blkPos] == !!(cgFlags & 1), "non zero coeff check failure\n");
116
+
117
+ if (cgFlags & 1)
118
{
119
if (deltaU[blkPos] > 0)
120
{
121
122
}
123
else
124
{
125
- if (n == firstNZPosInCG && abs(coeff[blkPos]) == 1)
126
+ if ((cgFlags == 1) && (abs(coeff[blkPos]) == 1))
127
+ {
128
+ X265_CHECK(n == firstNZPosInCG, "firstNZPosInCG position check failure\n");
129
curCost = MAX_INT;
130
+ }
131
else
132
{
133
curCost = deltaU[blkPos];
134
135
}
136
else
137
{
138
- if (n < firstNZPosInCG)
139
+ if (cgFlags == 0)
140
{
141
+ X265_CHECK(n < firstNZPosInCG, "firstNZPosInCG position check failure\n");
142
uint32_t thisSignBit = m_resiDctCoeff[blkPos] >= 0 ? 0 : 1;
143
if (thisSignBit != signbit)
144
curCost = MAX_INT;
145
146
finalChange = curChange;
147
minPos = blkPos;
148
}
149
+ cgFlags>>=1;
150
}
151
152
/* do not allow change to violate coeff clamp */
153
154
else if (finalChange == -1 && abs(coeff[minPos]) == 1)
155
numSig--;
156
157
- if (m_resiDctCoeff[minPos] >= 0)
158
- coeff[minPos] += finalChange;
159
- else
160
- coeff[minPos] -= finalChange;
161
+ {
162
+ const int16_t sigMask = ((int16_t)m_resiDctCoeff[minPos]) >> 15;
163
+ coeff[minPos] += ((int16_t)finalChange ^ sigMask) - sigMask;
164
+ }
165
}
166
}
167
-
168
- lastCG = false;
169
}
170
171
return numSig;
172
173
coeff_t* coeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip)
174
{
175
const uint32_t sizeIdx = log2TrSize - 2;
176
- if (m_tqBypass)
177
+
178
+ if (cu.m_tqBypass[0])
179
{
180
X265_CHECK(log2TrSize >= 2 && log2TrSize <= 5, "Block size mistake!\n");
181
return primitives.cu[sizeIdx].copy_cnt(coeff, residual, resiStride);
182
183
{
184
TUEntropyCodingParameters codeParams;
185
cu.getTUEntropyCodingParameters(codeParams, absPartIdx, log2TrSize, isLuma);
186
- return signBitHidingHDQ(coeff, deltaU, numSig, codeParams);
187
+ return signBitHidingHDQ(coeff, deltaU, numSig, codeParams, log2TrSize);
188
}
189
else
190
return numSig;
191
}
192
}
193
194
-void Quant::invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
195
+void Quant::invtransformNxN(const CUData& cu, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
196
uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig)
197
{
198
const uint32_t sizeIdx = log2TrSize - 2;
199
- if (m_tqBypass)
200
+
201
x265_1.7.tar.gz/source/common/quant.h -> x265_1.8.tar.gz/source/common/quant.h
Changed
84
1
2
#include "scalinglist.h"
3
#include "contexts.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class CUData;
10
11
int per;
12
int qp;
13
int64_t lambda2; /* FIX8 */
14
- int32_t lambda; /* FIX8, dynamic range is 18-bits in 8bpp and 20-bits in 16bpp */
15
+ int32_t lambda; /* FIX8, dynamic range is 18-bits in Main and 20-bits in Main10 */
16
17
QpParam() : qp(MAX_INT) {}
18
19
20
/* 0 = luma 4x4, 1 = luma 8x8, 2 = luma 16x16, 3 = luma 32x32
21
* 4 = chroma 4x4, 5 = chroma 8x8, 6 = chroma 16x16, 7 = chroma 32x32
22
* Intra 0..7 - Inter 8..15 */
23
- uint16_t offsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
24
- uint32_t residualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
25
+ ALIGN_VAR_16(uint32_t, residualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]);
26
uint32_t count[MAX_NUM_TR_CATEGORIES];
27
+ uint16_t offsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
28
};
29
30
class Quant
31
32
33
NoiseReduction* m_nr;
34
NoiseReduction* m_frameNr; // Array of NR structures, one for each frameEncoder
35
- bool m_tqBypass;
36
37
Quant();
38
~Quant();
39
40
uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
41
uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);
42
43
- void invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
44
+ void invtransformNxN(const CUData& cu, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
45
uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
46
47
/* Pattern decision for context derivation process of significant_coeff_flag */
48
49
const uint32_t sigPos = (uint32_t)(sigCoeffGroupFlag64 >> (cgBlkPos + 1)); // just need lowest 7-bits valid
50
51
// TODO: instruction BT is faster, but _bittest64 still generate instruction 'BT m, r' in VS2012
52
- const uint32_t sigRight = ((int32_t)(cgPosX - (trSizeCG - 1)) >> 31) & (sigPos & 1);
53
- const uint32_t sigLower = ((int32_t)(cgPosY - (trSizeCG - 1)) >> 31) & (sigPos >> (trSizeCG - 2)) & 2;
54
- return sigRight + sigLower;
55
+ const uint32_t sigRight = ((uint32_t)(cgPosX - (trSizeCG - 1)) >> 31) & sigPos;
56
+ const uint32_t sigLower = ((uint32_t)(cgPosY - (trSizeCG - 1)) >> 31) & (sigPos >> (trSizeCG - 1));
57
+ return sigRight + sigLower * 2;
58
}
59
60
/* Context derivation process of coeff_abs_significant_flag */
61
62
X265_CHECK(cgBlkPos < 64, "cgBlkPos is too large\n");
63
// NOTE: unsafe shift operator, see NOTE in calcPatternSigCtx
64
const uint32_t sigPos = (uint32_t)(cgGroupMask >> (cgBlkPos + 1)); // just need lowest 8-bits valid
65
- const uint32_t sigRight = ((int32_t)(cgPosX - (trSizeCG - 1)) >> 31) & sigPos;
66
- const uint32_t sigLower = ((int32_t)(cgPosY - (trSizeCG - 1)) >> 31) & (sigPos >> (trSizeCG - 1));
67
+ const uint32_t sigRight = ((uint32_t)(cgPosX - (trSizeCG - 1)) >> 31) & sigPos;
68
+ const uint32_t sigLower = ((uint32_t)(cgPosY - (trSizeCG - 1)) >> 31) & (sigPos >> (trSizeCG - 1));
69
70
- return (sigRight | sigLower) & 1;
71
+ return (sigRight | sigLower);
72
}
73
74
/* static methods shared with entropy.cpp */
75
76
77
void setChromaQP(int qpin, TextType ttype, int chFmt);
78
79
- uint32_t signBitHidingHDQ(int16_t* qcoeff, int32_t* deltaU, uint32_t numSig, const TUEntropyCodingParameters &codingParameters);
80
+ uint32_t signBitHidingHDQ(int16_t* qcoeff, int32_t* deltaU, uint32_t numSig, const TUEntropyCodingParameters &codingParameters, uint32_t log2TrSize);
81
82
uint32_t rdoQuant(const CUData& cu, int16_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy);
83
};
84
x265_1.7.tar.gz/source/common/scalinglist.cpp -> x265_1.8.tar.gz/source/common/scalinglist.cpp
Changed
37
1
2
},
3
};
4
5
-int quantTSDefault4x4[16] =
6
+static int quantTSDefault4x4[16] =
7
{
8
16, 16, 16, 16,
9
16, 16, 16, 16,
10
11
16, 16, 16, 16
12
};
13
14
-int quantIntraDefault8x8[64] =
15
+static int quantIntraDefault8x8[64] =
16
{
17
16, 16, 16, 16, 17, 18, 21, 24,
18
16, 16, 16, 16, 17, 19, 22, 25,
19
20
24, 25, 29, 36, 47, 65, 88, 115
21
};
22
23
-int quantInterDefault8x8[64] =
24
+static int quantInterDefault8x8[64] =
25
{
26
16, 16, 16, 16, 17, 18, 20, 24,
27
16, 16, 16, 17, 18, 20, 24, 25,
28
29
30
}
31
32
-namespace x265 {
33
+namespace X265_NS {
34
// private namespace
35
36
const int ScalingList::s_numCoefPerSize[NUM_SIZES] = { 16, 64, 256, 1024 };
37
x265_1.7.tar.gz/source/common/scalinglist.h -> x265_1.8.tar.gz/source/common/scalinglist.h
Changed
10
1
2
3
#include "common.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class ScalingList
10
x265_1.7.tar.gz/source/common/shortyuv.cpp -> x265_1.8.tar.gz/source/common/shortyuv.cpp
Changed
10
1
2
3
#include "x265.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
ShortYuv::ShortYuv()
9
{
10
x265_1.7.tar.gz/source/common/shortyuv.h -> x265_1.8.tar.gz/source/common/shortyuv.h
Changed
10
1
2
3
#include "common.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class Yuv;
10
x265_1.7.tar.gz/source/common/slice.cpp -> x265_1.8.tar.gz/source/common/slice.cpp
Changed
10
1
2
#include "picyuv.h"
3
#include "slice.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
void Slice::setRefPicList(PicList& picList)
9
{
10
x265_1.7.tar.gz/source/common/slice.h -> x265_1.8.tar.gz/source/common/slice.h
Changed
18
1
2
3
#include "common.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class Frame;
10
11
bool frameOnlyConstraintFlag;
12
bool profileCompatibilityFlag[32];
13
bool intraConstraintFlag;
14
+ bool onePictureOnlyConstraintFlag;
15
bool lowerBitRateConstraintFlag;
16
int profileIdc;
17
int levelIdc;
18
x265_1.7.tar.gz/source/common/threading.cpp -> x265_1.8.tar.gz/source/common/threading.cpp
Changed
98
1
2
* For more information, contact us at license @ x265.com
3
*****************************************************************************/
4
5
+#include "common.h"
6
#include "threading.h"
7
+#include "cpu.h"
8
9
-namespace x265 {
10
+namespace X265_NS {
11
// x265 private namespace
12
13
#if X265_ARCH_X86 && !defined(X86_64) && ENABLE_ASSEMBLY && defined(__GNUC__)
14
-extern "C" intptr_t x265_stack_align(void (*func)(), ...);
15
-#define x265_stack_align(func, ...) x265_stack_align((void (*)())func, __VA_ARGS__)
16
+extern "C" intptr_t PFX(stack_align)(void (*func)(), ...);
17
+#define STACK_ALIGN(func, ...) PFX(stack_align)((void (*)())func, __VA_ARGS__)
18
#else
19
-#define x265_stack_align(func, ...) func(__VA_ARGS__)
20
+#define STACK_ALIGN(func, ...) func(__VA_ARGS__)
21
+#endif
22
+
23
+#if NO_ATOMICS
24
+pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER;
25
+
26
+int no_atomic_or(int* ptr, int mask)
27
+{
28
+ pthread_mutex_lock(&g_mutex);
29
+ int ret = *ptr;
30
+ *ptr |= mask;
31
+ pthread_mutex_unlock(&g_mutex);
32
+ return ret;
33
+}
34
+
35
+int no_atomic_and(int* ptr, int mask)
36
+{
37
+ pthread_mutex_lock(&g_mutex);
38
+ int ret = *ptr;
39
+ *ptr &= mask;
40
+ pthread_mutex_unlock(&g_mutex);
41
+ return ret;
42
+}
43
+
44
+int no_atomic_inc(int* ptr)
45
+{
46
+ pthread_mutex_lock(&g_mutex);
47
+ *ptr += 1;
48
+ int ret = *ptr;
49
+ pthread_mutex_unlock(&g_mutex);
50
+ return ret;
51
+}
52
+
53
+int no_atomic_dec(int* ptr)
54
+{
55
+ pthread_mutex_lock(&g_mutex);
56
+ *ptr -= 1;
57
+ int ret = *ptr;
58
+ pthread_mutex_unlock(&g_mutex);
59
+ return ret;
60
+}
61
+
62
+int no_atomic_add(int* ptr, int val)
63
+{
64
+ pthread_mutex_lock(&g_mutex);
65
+ *ptr += val;
66
+ int ret = *ptr;
67
+ pthread_mutex_unlock(&g_mutex);
68
+ return ret;
69
+}
70
#endif
71
72
/* C shim for forced stack alignment */
73
static void stackAlignMain(Thread *instance)
74
{
75
+ // defer processing to the virtual function implemented in the derived class
76
instance->threadMain();
77
}
78
79
80
81
static DWORD WINAPI ThreadShim(Thread *instance)
82
{
83
- // defer processing to the virtual function implemented in the derived class
84
- x265_stack_align(stackAlignMain, instance);
85
+ STACK_ALIGN(stackAlignMain, instance);
86
87
return 0;
88
}
89
90
// defer processing to the virtual function implemented in the derived class
91
Thread *instance = reinterpret_cast<Thread *>(opaque);
92
93
- x265_stack_align(stackAlignMain, instance);
94
+ STACK_ALIGN(stackAlignMain, instance);
95
96
return NULL;
97
}
98
x265_1.7.tar.gz/source/common/threading.h -> x265_1.8.tar.gz/source/common/threading.h
Changed
50
1
2
#include <sys/sysctl.h>
3
#endif
4
5
-#ifdef __GNUC__ /* GCCs builtin atomics */
6
+#if NO_ATOMICS
7
+
8
+#include <sys/time.h>
9
+#include <unistd.h>
10
+
11
+namespace X265_NS {
12
+// x265 private namespace
13
+int no_atomic_or(int* ptr, int mask);
14
+int no_atomic_and(int* ptr, int mask);
15
+int no_atomic_inc(int* ptr);
16
+int no_atomic_dec(int* ptr);
17
+int no_atomic_add(int* ptr, int val);
18
+}
19
+
20
+#define CLZ(id, x) id = (unsigned long)__builtin_clz(x) ^ 31
21
+#define CTZ(id, x) id = (unsigned long)__builtin_ctz(x)
22
+#define ATOMIC_OR(ptr, mask) no_atomic_or((int*)ptr, mask)
23
+#define ATOMIC_AND(ptr, mask) no_atomic_and((int*)ptr, mask)
24
+#define ATOMIC_INC(ptr) no_atomic_inc((int*)ptr)
25
+#define ATOMIC_DEC(ptr) no_atomic_dec((int*)ptr)
26
+#define ATOMIC_ADD(ptr, val) no_atomic_add((int*)ptr, val)
27
+#define GIVE_UP_TIME() usleep(0)
28
+
29
+#elif __GNUC__ /* GCCs builtin atomics */
30
31
#include <sys/time.h>
32
#include <unistd.h>
33
34
35
#endif // ifdef __GNUC__
36
37
-namespace x265 {
38
+namespace X265_NS {
39
// x265 private namespace
40
41
#ifdef _WIN32
42
43
44
void stop();
45
};
46
-} // end namespace x265
47
+} // end namespace X265_NS
48
49
#endif // ifndef X265_THREADING_H
50
x265_1.7.tar.gz/source/common/threadpool.cpp -> x265_1.8.tar.gz/source/common/threadpool.cpp
Changed
25
1
2
#include <numa.h>
3
#endif
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// x265 private namespace
8
9
class WorkerThread : public Thread
10
11
ThreadPool *pools = new ThreadPool[numPools];
12
if (pools)
13
{
14
- int maxProviders = (p->frameNumThreads + 1 + numPools - 1) / numPools; /* +1 is Lookahead */
15
+ int maxProviders = (p->frameNumThreads + numPools - 1) / numPools + 1; /* +1 is Lookahead, always assigned to threadpool 0 */
16
int node = 0;
17
for (int i = 0; i < numPools; i++)
18
{
19
20
#endif
21
}
22
23
-} // end namespace x265
24
+} // end namespace X265_NS
25
x265_1.7.tar.gz/source/common/threadpool.h -> x265_1.8.tar.gz/source/common/threadpool.h
Changed
27
1
2
#include "common.h"
3
#include "threading.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// x265 private namespace
8
9
class ThreadPool;
10
11
* called. If it returns non-zero then some number of slave worker threads are
12
* already in the process of calling your processTasks() function. The master
13
* thread should participate and call processTasks() itself. When
14
- * waitForExit() returns, all bonded peer threads are quarunteed to have
15
+ * waitForExit() returns, all bonded peer threads are guaranteed to have
16
* exitied processTasks(). Since the thread count is small, it uses explicit
17
* locking instead of atomic counters and bitmasks */
18
class BondedTaskGroup
19
20
virtual void processTasks(int workerThreadId) = 0;
21
};
22
23
-} // end namespace x265
24
+} // end namespace X265_NS
25
26
#endif // ifndef X265_THREADPOOL_H
27
x265_1.7.tar.gz/source/common/vec/dct-sse3.cpp -> x265_1.8.tar.gz/source/common/vec/dct-sse3.cpp
Changed
63
1
2
#include <xmmintrin.h> // SSE
3
#include <pmmintrin.h> // SSE3
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
-namespace {
9
#define SHIFT1 7
10
#define ADD1 64
11
12
-#if HIGH_BIT_DEPTH
13
-#define SHIFT2 10
14
-#define ADD2 512
15
-#else
16
-#define SHIFT2 12
17
-#define ADD2 2048
18
-#endif
19
+#define SHIFT2 (12 - (X265_DEPTH - 8))
20
+#define ADD2 (1 << ((SHIFT2) - 1))
21
22
ALIGN_VAR_32(static const int16_t, tab_idct_8x8[12][8]) =
23
{
24
25
{ 83, 36, 83, 36, 83, 36, 83, 36 },
26
{ 36, -83, 36, -83, 36, -83, 36, -83 }
27
};
28
-void idct8(const int16_t* src, int16_t* dst, intptr_t stride)
29
+
30
+static void idct8(const int16_t* src, int16_t* dst, intptr_t stride)
31
{
32
__m128i m128iS0, m128iS1, m128iS2, m128iS3, m128iS4, m128iS5, m128iS6, m128iS7, m128iAdd, m128Tmp0, m128Tmp1, m128Tmp2, m128Tmp3, E0h, E1h, E2h, E3h, E0l, E1l, E2l, E3l, O0h, O1h, O2h, O3h, O0l, O1l, O2l, O3l, EE0l, EE1l, E00l, E01l, EE0h, EE1h, E00h, E01h;
33
__m128i T00, T01, T02, T03, T04, T05, T06, T07;
34
35
_mm_storeh_pi((__m64*)&dst[7 * stride + 4], _mm_castsi128_ps(T11));
36
}
37
38
-void idct16(const int16_t *src, int16_t *dst, intptr_t stride)
39
+static void idct16(const int16_t *src, int16_t *dst, intptr_t stride)
40
{
41
#define READ_UNPACKHILO(offset)\
42
const __m128i T_00_00A = _mm_unpacklo_epi16(*(__m128i*)&src[1 * 16 + offset], *(__m128i*)&src[3 * 16 + offset]);\
43
44
#undef UNPACKHILO
45
#undef READ_UNPACKHILO
46
47
-void idct32(const int16_t *src, int16_t *dst, intptr_t stride)
48
+static void idct32(const int16_t *src, int16_t *dst, intptr_t stride)
49
{
50
//Odd
51
const __m128i c16_p90_p90 = _mm_set1_epi32(0x005A005A); //column 0
52
53
}
54
}
55
56
-}
57
-
58
-namespace x265 {
59
+namespace X265_NS {
60
void setupIntrinsicDCT_sse3(EncoderPrimitives &p)
61
{
62
/* Note: We have AVX2 assembly for these functions, but since AVX2 is still
63
x265_1.7.tar.gz/source/common/vec/dct-sse41.cpp -> x265_1.8.tar.gz/source/common/vec/dct-sse41.cpp
Changed
25
1
2
#include <xmmintrin.h> // SSE
3
#include <smmintrin.h> // SSE4.1
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
-namespace {
9
-void dequant_scaling(const int16_t* quantCoef, const int32_t *deQuantCoef, int16_t* coef, int num, int per, int shift)
10
+static void dequant_scaling(const int16_t* quantCoef, const int32_t *deQuantCoef, int16_t* coef, int num, int per, int shift)
11
{
12
X265_CHECK(num <= 32 * 32, "dequant num too large\n");
13
14
15
}
16
}
17
}
18
-}
19
20
-namespace x265 {
21
+namespace X265_NS {
22
void setupIntrinsicDCT_sse41(EncoderPrimitives &p)
23
{
24
p.dequant_scaling = dequant_scaling;
25
x265_1.7.tar.gz/source/common/vec/dct-ssse3.cpp -> x265_1.8.tar.gz/source/common/vec/dct-ssse3.cpp
Changed
201
1
2
#include <pmmintrin.h> // SSE3
3
#include <tmmintrin.h> // SSSE3
4
5
-using namespace x265;
6
+#define DCT16_SHIFT1 (3 + X265_DEPTH - 8)
7
+#define DCT16_ADD1 (1 << ((DCT16_SHIFT1) - 1))
8
+
9
+#define DCT16_SHIFT2 10
10
+#define DCT16_ADD2 (1 << ((DCT16_SHIFT2) - 1))
11
+
12
+#define DCT32_SHIFT1 (DCT16_SHIFT1 + 1)
13
+#define DCT32_ADD1 (1 << ((DCT32_SHIFT1) - 1))
14
+
15
+#define DCT32_SHIFT2 (DCT16_SHIFT2 + 1)
16
+#define DCT32_ADD2 (1 << ((DCT32_SHIFT2) - 1))
17
+
18
+using namespace X265_NS;
19
20
-namespace {
21
ALIGN_VAR_32(static const int16_t, tab_dct_8[][8]) =
22
{
23
{ 0x0100, 0x0F0E, 0x0706, 0x0908, 0x0302, 0x0D0C, 0x0504, 0x0B0A },
24
25
#undef MAKE_COEF
26
};
27
28
-void dct16(const int16_t *src, int16_t *dst, intptr_t stride)
29
+static void dct16(const int16_t *src, int16_t *dst, intptr_t stride)
30
{
31
-#if HIGH_BIT_DEPTH
32
-#define SHIFT1 5
33
-#define ADD1 16
34
-#else
35
-#define SHIFT1 3
36
-#define ADD1 4
37
-#endif
38
-
39
-#define SHIFT2 10
40
-#define ADD2 512
41
-
42
// Const
43
- __m128i c_4 = _mm_set1_epi32(ADD1);
44
- __m128i c_512 = _mm_set1_epi32(ADD2);
45
+ __m128i c_4 = _mm_set1_epi32(DCT16_ADD1);
46
+ __m128i c_512 = _mm_set1_epi32(DCT16_ADD2);
47
48
int i;
49
50
51
52
T60 = _mm_madd_epi16(T50, _mm_load_si128((__m128i*)tab_dct_8[1]));
53
T61 = _mm_madd_epi16(T51, _mm_load_si128((__m128i*)tab_dct_8[1]));
54
- T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), SHIFT1);
55
- T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), SHIFT1);
56
+ T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
57
+ T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
58
T70 = _mm_packs_epi32(T60, T61);
59
_mm_store_si128((__m128i*)&tmp[0 * 16 + i], T70);
60
61
T60 = _mm_madd_epi16(T50, _mm_load_si128((__m128i*)tab_dct_8[2]));
62
T61 = _mm_madd_epi16(T51, _mm_load_si128((__m128i*)tab_dct_8[2]));
63
- T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), SHIFT1);
64
- T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), SHIFT1);
65
+ T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
66
+ T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
67
T70 = _mm_packs_epi32(T60, T61);
68
_mm_store_si128((__m128i*)&tmp[8 * 16 + i], T70);
69
70
T60 = _mm_madd_epi16(T52, _mm_load_si128((__m128i*)tab_dct_8[3]));
71
T61 = _mm_madd_epi16(T53, _mm_load_si128((__m128i*)tab_dct_8[3]));
72
- T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), SHIFT1);
73
- T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), SHIFT1);
74
+ T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
75
+ T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
76
T70 = _mm_packs_epi32(T60, T61);
77
_mm_store_si128((__m128i*)&tmp[4 * 16 + i], T70);
78
79
T60 = _mm_madd_epi16(T52, _mm_load_si128((__m128i*)tab_dct_8[4]));
80
T61 = _mm_madd_epi16(T53, _mm_load_si128((__m128i*)tab_dct_8[4]));
81
- T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), SHIFT1);
82
- T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), SHIFT1);
83
+ T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
84
+ T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
85
T70 = _mm_packs_epi32(T60, T61);
86
_mm_store_si128((__m128i*)&tmp[12 * 16 + i], T70);
87
88
89
T63 = _mm_madd_epi16(T47, _mm_load_si128((__m128i*)tab_dct_8[5]));
90
T60 = _mm_hadd_epi32(T60, T61);
91
T61 = _mm_hadd_epi32(T62, T63);
92
- T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), SHIFT1);
93
- T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), SHIFT1);
94
+ T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
95
+ T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
96
T70 = _mm_packs_epi32(T60, T61);
97
_mm_store_si128((__m128i*)&tmp[2 * 16 + i], T70);
98
99
100
T63 = _mm_madd_epi16(T47, _mm_load_si128((__m128i*)tab_dct_8[6]));
101
T60 = _mm_hadd_epi32(T60, T61);
102
T61 = _mm_hadd_epi32(T62, T63);
103
- T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), SHIFT1);
104
- T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), SHIFT1);
105
+ T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
106
+ T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
107
T70 = _mm_packs_epi32(T60, T61);
108
_mm_store_si128((__m128i*)&tmp[6 * 16 + i], T70);
109
110
111
T63 = _mm_madd_epi16(T47, _mm_load_si128((__m128i*)tab_dct_8[7]));
112
T60 = _mm_hadd_epi32(T60, T61);
113
T61 = _mm_hadd_epi32(T62, T63);
114
- T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), SHIFT1);
115
- T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), SHIFT1);
116
+ T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
117
+ T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
118
T70 = _mm_packs_epi32(T60, T61);
119
_mm_store_si128((__m128i*)&tmp[10 * 16 + i], T70);
120
121
122
T63 = _mm_madd_epi16(T47, _mm_load_si128((__m128i*)tab_dct_8[8]));
123
T60 = _mm_hadd_epi32(T60, T61);
124
T61 = _mm_hadd_epi32(T62, T63);
125
- T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), SHIFT1);
126
- T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), SHIFT1);
127
+ T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
128
+ T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
129
T70 = _mm_packs_epi32(T60, T61);
130
_mm_store_si128((__m128i*)&tmp[14 * 16 + i], T70);
131
132
133
T63 = _mm_hadd_epi32(T66, T67); \
134
T60 = _mm_hadd_epi32(T60, T61); \
135
T61 = _mm_hadd_epi32(T62, T63); \
136
- T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), SHIFT1); \
137
- T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), SHIFT1); \
138
+ T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1); \
139
+ T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1); \
140
T70 = _mm_packs_epi32(T60, T61); \
141
_mm_store_si128((__m128i*)&tmp[(dstPos) * 16 + i], T70);
142
143
144
145
T40 = _mm_hadd_epi32(T30, T31);
146
T41 = _mm_hsub_epi32(T30, T31);
147
- T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), SHIFT2);
148
- T41 = _mm_srai_epi32(_mm_add_epi32(T41, c_512), SHIFT2);
149
+ T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
150
+ T41 = _mm_srai_epi32(_mm_add_epi32(T41, c_512), DCT16_SHIFT2);
151
T40 = _mm_packs_epi32(T40, T40);
152
T41 = _mm_packs_epi32(T41, T41);
153
_mm_storel_epi64((__m128i*)&dst[0 * 16 + i], T40);
154
155
T31 = _mm_hadd_epi32(T32, T33);
156
157
T40 = _mm_hadd_epi32(T30, T31);
158
- T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), SHIFT2);
159
+ T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
160
T40 = _mm_packs_epi32(T40, T40);
161
_mm_storel_epi64((__m128i*)&dst[4 * 16 + i], T40);
162
163
164
T31 = _mm_hadd_epi32(T32, T33);
165
166
T40 = _mm_hadd_epi32(T30, T31);
167
- T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), SHIFT2);
168
+ T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
169
T40 = _mm_packs_epi32(T40, T40);
170
_mm_storel_epi64((__m128i*)&dst[12 * 16 + i], T40);
171
172
173
T31 = _mm_hadd_epi32(T32, T33);
174
175
T40 = _mm_hadd_epi32(T30, T31);
176
- T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), SHIFT2);
177
+ T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
178
T40 = _mm_packs_epi32(T40, T40);
179
_mm_storel_epi64((__m128i*)&dst[2 * 16 + i], T40);
180
181
182
T31 = _mm_hadd_epi32(T32, T33);
183
184
T40 = _mm_hadd_epi32(T30, T31);
185
- T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), SHIFT2);
186
+ T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
187
T40 = _mm_packs_epi32(T40, T40);
188
_mm_storel_epi64((__m128i*)&dst[6 * 16 + i], T40);
189
190
191
T31 = _mm_hadd_epi32(T32, T33);
192
193
T40 = _mm_hadd_epi32(T30, T31);
194
- T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), SHIFT2);
195
+ T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
196
T40 = _mm_packs_epi32(T40, T40);
197
_mm_storel_epi64((__m128i*)&dst[10 * 16 + i], T40);
198
199
200
T31 = _mm_hadd_epi32(T32, T33);
201
x265_1.7.tar.gz/source/common/vec/vec-primitives.cpp -> x265_1.8.tar.gz/source/common/vec/vec-primitives.cpp
Changed
26
1
2
#define HAVE_SSE4
3
#define HAVE_AVX2
4
#elif defined(__GNUC__)
5
-#if __clang__ || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 3)
6
+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
7
+#if __clang__ || GCC_VERSION >= 40300 /* gcc_version >= gcc-4.3.0 */
8
#define HAVE_SSE3
9
#define HAVE_SSSE3
10
#define HAVE_SSE4
11
#endif
12
-#if __clang__ || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 7)
13
+#if __clang__ || GCC_VERSION >= 40700 /* gcc_version >= gcc-4.7.0 */
14
#define HAVE_AVX2
15
#endif
16
#elif defined(_MSC_VER)
17
18
#endif // compiler checks
19
#endif // if X265_ARCH_X86
20
21
-namespace x265 {
22
+namespace X265_NS {
23
// private x265 namespace
24
25
void setupIntrinsicDCT_sse3(EncoderPrimitives&);
26
x265_1.7.tar.gz/source/common/version.cpp -> x265_1.8.tar.gz/source/common/version.cpp
Changed
138
1
2
3
#include "x265.h"
4
#include "common.h"
5
+#include "primitives.h"
6
7
#define XSTR(x) STR(x)
8
#define STR(x) #x
9
10
#if defined(__clang__)
11
-#define NVM_COMPILEDBY "[clang " XSTR(__clang_major__) "." XSTR(__clang_minor__) "." XSTR(__clang_patchlevel__) "]"
12
+#define COMPILEDBY "[clang " XSTR(__clang_major__) "." XSTR(__clang_minor__) "." XSTR(__clang_patchlevel__) "]"
13
#ifdef __IA64__
14
-#define NVM_ONARCH "[on 64-bit] "
15
+#define ONARCH "[on 64-bit] "
16
#else
17
-#define NVM_ONARCH "[on 32-bit] "
18
+#define ONARCH "[on 32-bit] "
19
#endif
20
#endif
21
22
#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__)
23
-#define NVM_COMPILEDBY "[GCC " XSTR(__GNUC__) "." XSTR(__GNUC_MINOR__) "." XSTR(__GNUC_PATCHLEVEL__) "]"
24
+#define COMPILEDBY "[GCC " XSTR(__GNUC__) "." XSTR(__GNUC_MINOR__) "." XSTR(__GNUC_PATCHLEVEL__) "]"
25
#ifdef __IA64__
26
-#define NVM_ONARCH "[on 64-bit] "
27
+#define ONARCH "[on 64-bit] "
28
#else
29
-#define NVM_ONARCH "[on 32-bit] "
30
+#define ONARCH "[on 32-bit] "
31
#endif
32
#endif
33
34
#ifdef __INTEL_COMPILER
35
-#define NVM_COMPILEDBY "[ICC " XSTR(__INTEL_COMPILER) "]"
36
+#define COMPILEDBY "[ICC " XSTR(__INTEL_COMPILER) "]"
37
#elif _MSC_VER
38
-#define NVM_COMPILEDBY "[MSVC " XSTR(_MSC_VER) "]"
39
+#define COMPILEDBY "[MSVC " XSTR(_MSC_VER) "]"
40
#endif
41
42
-#ifndef NVM_COMPILEDBY
43
-#define NVM_COMPILEDBY "[Unk-CXX]"
44
+#ifndef COMPILEDBY
45
+#define COMPILEDBY "[Unk-CXX]"
46
#endif
47
48
#ifdef _WIN32
49
-#define NVM_ONOS "[Windows]"
50
+#define ONOS "[Windows]"
51
#elif __linux
52
-#define NVM_ONOS "[Linux]"
53
+#define ONOS "[Linux]"
54
#elif __OpenBSD__
55
-#define NVM_ONOS "[OpenBSD]"
56
+#define ONOS "[OpenBSD]"
57
#elif __CYGWIN__
58
-#define NVM_ONOS "[Cygwin]"
59
+#define ONOS "[Cygwin]"
60
#elif __APPLE__
61
-#define NVM_ONOS "[Mac OS X]"
62
+#define ONOS "[Mac OS X]"
63
#else
64
-#define NVM_ONOS "[Unk-OS]"
65
+#define ONOS "[Unk-OS]"
66
#endif
67
68
#if X86_64
69
-#define NVM_BITS "[64 bit]"
70
+#define BITS "[64 bit]"
71
#else
72
-#define NVM_BITS "[32 bit]"
73
+#define BITS "[32 bit]"
74
+#endif
75
+
76
+#if defined(ENABLE_ASSEMBLY)
77
+#define ASM ""
78
+#else
79
+#define ASM "[noasm]"
80
+#endif
81
+
82
+#if NO_ATOMICS
83
+#define ATOMICS "[no-atomics]"
84
+#else
85
+#define ATOMICS ""
86
#endif
87
88
#if CHECKED_BUILD
89
-#define CHECKED "[CHECKED] "
90
+#define CHECKED "[CHECKED] "
91
#else
92
-#define CHECKED " "
93
+#define CHECKED " "
94
#endif
95
96
-#if HIGH_BIT_DEPTH
97
-#define BITDEPTH "16bpp"
98
-const int x265_max_bit_depth = 10;
99
+#if X265_DEPTH == 12
100
+
101
+#define BITDEPTH "12bit"
102
+const int PFX(max_bit_depth) = 12;
103
+
104
+#elif X265_DEPTH == 10
105
+
106
+#define BITDEPTH "10bit"
107
+const int PFX(max_bit_depth) = 10;
108
+
109
+#elif X265_DEPTH == 8
110
+
111
+#define BITDEPTH "8bit"
112
+const int PFX(max_bit_depth) = 8;
113
+
114
+#endif
115
+
116
+#if LINKED_8BIT
117
+#define ADD8 "+8bit"
118
+#else
119
+#define ADD8 ""
120
+#endif
121
+#if LINKED_10BIT
122
+#define ADD10 "+10bit"
123
+#else
124
+#define ADD10 ""
125
+#endif
126
+#if LINKED_12BIT
127
+#define ADD12 "+12bit"
128
#else
129
-#define BITDEPTH "8bpp"
130
-const int x265_max_bit_depth = 8;
131
+#define ADD12 ""
132
#endif
133
134
-const char *x265_version_str = XSTR(X265_VERSION);
135
-const char *x265_build_info_str = NVM_ONOS NVM_COMPILEDBY NVM_BITS CHECKED BITDEPTH;
136
+const char* PFX(version_str) = XSTR(X265_VERSION);
137
+const char* PFX(build_info_str) = ONOS COMPILEDBY BITS ASM ATOMICS CHECKED BITDEPTH ADD8 ADD10 ADD12;
138
x265_1.7.tar.gz/source/common/wavefront.cpp -> x265_1.8.tar.gz/source/common/wavefront.cpp
Changed
10
1
2
#include "wavefront.h"
3
#include "common.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// x265 private namespace
8
9
bool WaveFront::init(int numRows)
10
x265_1.7.tar.gz/source/common/wavefront.h -> x265_1.8.tar.gz/source/common/wavefront.h
Changed
18
1
2
#include "common.h"
3
#include "threadpool.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// x265 private namespace
8
9
// Generic wave-front scheduler, manages busy-state of CU rows as a priority
10
11
// derived classes.
12
virtual void processRow(int row, int threadId) = 0;
13
};
14
-} // end namespace x265
15
+} // end namespace X265_NS
16
17
#endif // ifndef X265_WAVEFRONT_H
18
x265_1.7.tar.gz/source/common/winxp.cpp -> x265_1.8.tar.gz/source/common/winxp.cpp
Changed
19
1
2
3
#if defined(_WIN32) && (_WIN32_WINNT < 0x0600) // _WIN32_WINNT_VISTA
4
5
-namespace x265 {
6
+namespace X265_NS {
7
/* Mimic CONDITION_VARIABLE functions only supported on Vista+ */
8
9
int WINAPI cond_init(ConditionVariable *cond)
10
11
DeleteCriticalSection(&cond->broadcastMutex);
12
DeleteCriticalSection(&cond->waiterCountMutex);
13
}
14
-} // namespace x265
15
+} // namespace X265_NS
16
17
#elif defined(_MSC_VER)
18
19
x265_1.7.tar.gz/source/common/winxp.h -> x265_1.8.tar.gz/source/common/winxp.h
Changed
32
1
2
#include <intrin.h> // _InterlockedCompareExchange64
3
#endif
4
5
-namespace x265 {
6
+namespace X265_NS {
7
/* non-native condition variable */
8
typedef struct
9
{
10
11
void cond_destroy(ConditionVariable *cond);
12
13
/* map missing API symbols to our structure and functions */
14
-#define CONDITION_VARIABLE x265::ConditionVariable
15
-#define InitializeConditionVariable x265::cond_init
16
-#define SleepConditionVariableCS x265::cond_wait
17
-#define WakeConditionVariable x265::cond_signal
18
-#define WakeAllConditionVariable x265::cond_broadcast
19
-#define XP_CONDITION_VAR_FREE x265::cond_destroy
20
+#define CONDITION_VARIABLE X265_NS::ConditionVariable
21
+#define InitializeConditionVariable X265_NS::cond_init
22
+#define SleepConditionVariableCS X265_NS::cond_wait
23
+#define WakeConditionVariable X265_NS::cond_signal
24
+#define WakeAllConditionVariable X265_NS::cond_broadcast
25
+#define XP_CONDITION_VAR_FREE X265_NS::cond_destroy
26
27
-} // namespace x265
28
+} // namespace X265_NS
29
30
#else // if defined(_WIN32) && (_WIN32_WINNT < 0x0600)
31
32
x265_1.7.tar.gz/source/common/x86/asm-primitives.cpp -> x265_1.8.tar.gz/source/common/x86/asm-primitives.cpp
Changed
201
1
2
#include "x265.h"
3
#include "cpu.h"
4
5
+#define FUNCDEF_TU(ret, name, cpu, ...) \
6
+ ret PFX(name ## _4x4_ ## cpu(__VA_ARGS__)); \
7
+ ret PFX(name ## _8x8_ ## cpu(__VA_ARGS__)); \
8
+ ret PFX(name ## _16x16_ ## cpu(__VA_ARGS__)); \
9
+ ret PFX(name ## _32x32_ ## cpu(__VA_ARGS__)); \
10
+ ret PFX(name ## _64x64_ ## cpu(__VA_ARGS__))
11
+
12
+#define FUNCDEF_TU_S(ret, name, cpu, ...) \
13
+ ret PFX(name ## _4_ ## cpu(__VA_ARGS__)); \
14
+ ret PFX(name ## _8_ ## cpu(__VA_ARGS__)); \
15
+ ret PFX(name ## _16_ ## cpu(__VA_ARGS__)); \
16
+ ret PFX(name ## _32_ ## cpu(__VA_ARGS__)); \
17
+ ret PFX(name ## _64_ ## cpu(__VA_ARGS__))
18
+
19
+#define FUNCDEF_TU_S2(ret, name, cpu, ...) \
20
+ ret PFX(name ## 4_ ## cpu(__VA_ARGS__)); \
21
+ ret PFX(name ## 8_ ## cpu(__VA_ARGS__)); \
22
+ ret PFX(name ## 16_ ## cpu(__VA_ARGS__)); \
23
+ ret PFX(name ## 32_ ## cpu(__VA_ARGS__)); \
24
+ ret PFX(name ## 64_ ## cpu(__VA_ARGS__))
25
+
26
+#define FUNCDEF_PU(ret, name, cpu, ...) \
27
+ ret PFX(name ## _4x4_ ## cpu)(__VA_ARGS__); \
28
+ ret PFX(name ## _8x8_ ## cpu)(__VA_ARGS__); \
29
+ ret PFX(name ## _16x16_ ## cpu)(__VA_ARGS__); \
30
+ ret PFX(name ## _32x32_ ## cpu)(__VA_ARGS__); \
31
+ ret PFX(name ## _64x64_ ## cpu)(__VA_ARGS__); \
32
+ ret PFX(name ## _8x4_ ## cpu)(__VA_ARGS__); \
33
+ ret PFX(name ## _4x8_ ## cpu)(__VA_ARGS__); \
34
+ ret PFX(name ## _16x8_ ## cpu)(__VA_ARGS__); \
35
+ ret PFX(name ## _8x16_ ## cpu)(__VA_ARGS__); \
36
+ ret PFX(name ## _16x32_ ## cpu)(__VA_ARGS__); \
37
+ ret PFX(name ## _32x16_ ## cpu)(__VA_ARGS__); \
38
+ ret PFX(name ## _64x32_ ## cpu)(__VA_ARGS__); \
39
+ ret PFX(name ## _32x64_ ## cpu)(__VA_ARGS__); \
40
+ ret PFX(name ## _16x12_ ## cpu)(__VA_ARGS__); \
41
+ ret PFX(name ## _12x16_ ## cpu)(__VA_ARGS__); \
42
+ ret PFX(name ## _16x4_ ## cpu)(__VA_ARGS__); \
43
+ ret PFX(name ## _4x16_ ## cpu)(__VA_ARGS__); \
44
+ ret PFX(name ## _32x24_ ## cpu)(__VA_ARGS__); \
45
+ ret PFX(name ## _24x32_ ## cpu)(__VA_ARGS__); \
46
+ ret PFX(name ## _32x8_ ## cpu)(__VA_ARGS__); \
47
+ ret PFX(name ## _8x32_ ## cpu)(__VA_ARGS__); \
48
+ ret PFX(name ## _64x48_ ## cpu)(__VA_ARGS__); \
49
+ ret PFX(name ## _48x64_ ## cpu)(__VA_ARGS__); \
50
+ ret PFX(name ## _64x16_ ## cpu)(__VA_ARGS__); \
51
+ ret PFX(name ## _16x64_ ## cpu)(__VA_ARGS__)
52
+
53
+#define FUNCDEF_CHROMA_PU(ret, name, cpu, ...) \
54
+ FUNCDEF_PU(ret, name, cpu, __VA_ARGS__); \
55
+ ret PFX(name ## _4x2_ ## cpu)(__VA_ARGS__); \
56
+ ret PFX(name ## _2x4_ ## cpu)(__VA_ARGS__); \
57
+ ret PFX(name ## _8x2_ ## cpu)(__VA_ARGS__); \
58
+ ret PFX(name ## _2x8_ ## cpu)(__VA_ARGS__); \
59
+ ret PFX(name ## _8x6_ ## cpu)(__VA_ARGS__); \
60
+ ret PFX(name ## _6x8_ ## cpu)(__VA_ARGS__); \
61
+ ret PFX(name ## _8x12_ ## cpu)(__VA_ARGS__); \
62
+ ret PFX(name ## _12x8_ ## cpu)(__VA_ARGS__); \
63
+ ret PFX(name ## _6x16_ ## cpu)(__VA_ARGS__); \
64
+ ret PFX(name ## _16x6_ ## cpu)(__VA_ARGS__); \
65
+ ret PFX(name ## _2x16_ ## cpu)(__VA_ARGS__); \
66
+ ret PFX(name ## _16x2_ ## cpu)(__VA_ARGS__); \
67
+ ret PFX(name ## _4x12_ ## cpu)(__VA_ARGS__); \
68
+ ret PFX(name ## _12x4_ ## cpu)(__VA_ARGS__); \
69
+ ret PFX(name ## _32x12_ ## cpu)(__VA_ARGS__); \
70
+ ret PFX(name ## _12x32_ ## cpu)(__VA_ARGS__); \
71
+ ret PFX(name ## _32x4_ ## cpu)(__VA_ARGS__); \
72
+ ret PFX(name ## _4x32_ ## cpu)(__VA_ARGS__); \
73
+ ret PFX(name ## _32x48_ ## cpu)(__VA_ARGS__); \
74
+ ret PFX(name ## _48x32_ ## cpu)(__VA_ARGS__); \
75
+ ret PFX(name ## _16x24_ ## cpu)(__VA_ARGS__); \
76
+ ret PFX(name ## _24x16_ ## cpu)(__VA_ARGS__); \
77
+ ret PFX(name ## _8x64_ ## cpu)(__VA_ARGS__); \
78
+ ret PFX(name ## _64x8_ ## cpu)(__VA_ARGS__); \
79
+ ret PFX(name ## _64x24_ ## cpu)(__VA_ARGS__); \
80
+ ret PFX(name ## _24x64_ ## cpu)(__VA_ARGS__);
81
+
82
extern "C" {
83
#include "pixel.h"
84
#include "pixel-util.h"
85
86
}
87
88
#define ALL_LUMA_CU_TYPED(prim, fncdef, fname, cpu) \
89
- p.cu[BLOCK_8x8].prim = fncdef x265_ ## fname ## _8x8_ ## cpu; \
90
- p.cu[BLOCK_16x16].prim = fncdef x265_ ## fname ## _16x16_ ## cpu; \
91
- p.cu[BLOCK_32x32].prim = fncdef x265_ ## fname ## _32x32_ ## cpu; \
92
- p.cu[BLOCK_64x64].prim = fncdef x265_ ## fname ## _64x64_ ## cpu
93
+ p.cu[BLOCK_8x8].prim = fncdef PFX(fname ## _8x8_ ## cpu); \
94
+ p.cu[BLOCK_16x16].prim = fncdef PFX(fname ## _16x16_ ## cpu); \
95
+ p.cu[BLOCK_32x32].prim = fncdef PFX(fname ## _32x32_ ## cpu); \
96
+ p.cu[BLOCK_64x64].prim = fncdef PFX(fname ## _64x64_ ## cpu)
97
#define ALL_LUMA_CU_TYPED_S(prim, fncdef, fname, cpu) \
98
- p.cu[BLOCK_8x8].prim = fncdef x265_ ## fname ## 8_ ## cpu; \
99
- p.cu[BLOCK_16x16].prim = fncdef x265_ ## fname ## 16_ ## cpu; \
100
- p.cu[BLOCK_32x32].prim = fncdef x265_ ## fname ## 32_ ## cpu; \
101
- p.cu[BLOCK_64x64].prim = fncdef x265_ ## fname ## 64_ ## cpu
102
+ p.cu[BLOCK_8x8].prim = fncdef PFX(fname ## 8_ ## cpu); \
103
+ p.cu[BLOCK_16x16].prim = fncdef PFX(fname ## 16_ ## cpu); \
104
+ p.cu[BLOCK_32x32].prim = fncdef PFX(fname ## 32_ ## cpu); \
105
+ p.cu[BLOCK_64x64].prim = fncdef PFX(fname ## 64_ ## cpu)
106
#define ALL_LUMA_TU_TYPED(prim, fncdef, fname, cpu) \
107
- p.cu[BLOCK_4x4].prim = fncdef x265_ ## fname ## _4x4_ ## cpu; \
108
- p.cu[BLOCK_8x8].prim = fncdef x265_ ## fname ## _8x8_ ## cpu; \
109
- p.cu[BLOCK_16x16].prim = fncdef x265_ ## fname ## _16x16_ ## cpu; \
110
- p.cu[BLOCK_32x32].prim = fncdef x265_ ## fname ## _32x32_ ## cpu
111
+ p.cu[BLOCK_4x4].prim = fncdef PFX(fname ## _4x4_ ## cpu); \
112
+ p.cu[BLOCK_8x8].prim = fncdef PFX(fname ## _8x8_ ## cpu); \
113
+ p.cu[BLOCK_16x16].prim = fncdef PFX(fname ## _16x16_ ## cpu); \
114
+ p.cu[BLOCK_32x32].prim = fncdef PFX(fname ## _32x32_ ## cpu)
115
#define ALL_LUMA_TU_TYPED_S(prim, fncdef, fname, cpu) \
116
- p.cu[BLOCK_4x4].prim = fncdef x265_ ## fname ## 4_ ## cpu; \
117
- p.cu[BLOCK_8x8].prim = fncdef x265_ ## fname ## 8_ ## cpu; \
118
- p.cu[BLOCK_16x16].prim = fncdef x265_ ## fname ## 16_ ## cpu; \
119
- p.cu[BLOCK_32x32].prim = fncdef x265_ ## fname ## 32_ ## cpu
120
+ p.cu[BLOCK_4x4].prim = fncdef PFX(fname ## 4_ ## cpu); \
121
+ p.cu[BLOCK_8x8].prim = fncdef PFX(fname ## 8_ ## cpu); \
122
+ p.cu[BLOCK_16x16].prim = fncdef PFX(fname ## 16_ ## cpu); \
123
+ p.cu[BLOCK_32x32].prim = fncdef PFX(fname ## 32_ ## cpu)
124
#define ALL_LUMA_BLOCKS_TYPED(prim, fncdef, fname, cpu) \
125
- p.cu[BLOCK_4x4].prim = fncdef x265_ ## fname ## _4x4_ ## cpu; \
126
- p.cu[BLOCK_8x8].prim = fncdef x265_ ## fname ## _8x8_ ## cpu; \
127
- p.cu[BLOCK_16x16].prim = fncdef x265_ ## fname ## _16x16_ ## cpu; \
128
- p.cu[BLOCK_32x32].prim = fncdef x265_ ## fname ## _32x32_ ## cpu; \
129
- p.cu[BLOCK_64x64].prim = fncdef x265_ ## fname ## _64x64_ ## cpu;
130
+ p.cu[BLOCK_4x4].prim = fncdef PFX(fname ## _4x4_ ## cpu); \
131
+ p.cu[BLOCK_8x8].prim = fncdef PFX(fname ## _8x8_ ## cpu); \
132
+ p.cu[BLOCK_16x16].prim = fncdef PFX(fname ## _16x16_ ## cpu); \
133
+ p.cu[BLOCK_32x32].prim = fncdef PFX(fname ## _32x32_ ## cpu); \
134
+ p.cu[BLOCK_64x64].prim = fncdef PFX(fname ## _64x64_ ## cpu);
135
#define ALL_LUMA_CU(prim, fname, cpu) ALL_LUMA_CU_TYPED(prim, , fname, cpu)
136
#define ALL_LUMA_CU_S(prim, fname, cpu) ALL_LUMA_CU_TYPED_S(prim, , fname, cpu)
137
#define ALL_LUMA_TU(prim, fname, cpu) ALL_LUMA_TU_TYPED(prim, , fname, cpu)
138
139
#define ALL_LUMA_TU_S(prim, fname, cpu) ALL_LUMA_TU_TYPED_S(prim, , fname, cpu)
140
141
#define ALL_LUMA_PU_TYPED(prim, fncdef, fname, cpu) \
142
- p.pu[LUMA_8x8].prim = fncdef x265_ ## fname ## _8x8_ ## cpu; \
143
- p.pu[LUMA_16x16].prim = fncdef x265_ ## fname ## _16x16_ ## cpu; \
144
- p.pu[LUMA_32x32].prim = fncdef x265_ ## fname ## _32x32_ ## cpu; \
145
- p.pu[LUMA_64x64].prim = fncdef x265_ ## fname ## _64x64_ ## cpu; \
146
- p.pu[LUMA_8x4].prim = fncdef x265_ ## fname ## _8x4_ ## cpu; \
147
- p.pu[LUMA_4x8].prim = fncdef x265_ ## fname ## _4x8_ ## cpu; \
148
- p.pu[LUMA_16x8].prim = fncdef x265_ ## fname ## _16x8_ ## cpu; \
149
- p.pu[LUMA_8x16].prim = fncdef x265_ ## fname ## _8x16_ ## cpu; \
150
- p.pu[LUMA_16x32].prim = fncdef x265_ ## fname ## _16x32_ ## cpu; \
151
- p.pu[LUMA_32x16].prim = fncdef x265_ ## fname ## _32x16_ ## cpu; \
152
- p.pu[LUMA_64x32].prim = fncdef x265_ ## fname ## _64x32_ ## cpu; \
153
- p.pu[LUMA_32x64].prim = fncdef x265_ ## fname ## _32x64_ ## cpu; \
154
- p.pu[LUMA_16x12].prim = fncdef x265_ ## fname ## _16x12_ ## cpu; \
155
- p.pu[LUMA_12x16].prim = fncdef x265_ ## fname ## _12x16_ ## cpu; \
156
- p.pu[LUMA_16x4].prim = fncdef x265_ ## fname ## _16x4_ ## cpu; \
157
- p.pu[LUMA_4x16].prim = fncdef x265_ ## fname ## _4x16_ ## cpu; \
158
- p.pu[LUMA_32x24].prim = fncdef x265_ ## fname ## _32x24_ ## cpu; \
159
- p.pu[LUMA_24x32].prim = fncdef x265_ ## fname ## _24x32_ ## cpu; \
160
- p.pu[LUMA_32x8].prim = fncdef x265_ ## fname ## _32x8_ ## cpu; \
161
- p.pu[LUMA_8x32].prim = fncdef x265_ ## fname ## _8x32_ ## cpu; \
162
- p.pu[LUMA_64x48].prim = fncdef x265_ ## fname ## _64x48_ ## cpu; \
163
- p.pu[LUMA_48x64].prim = fncdef x265_ ## fname ## _48x64_ ## cpu; \
164
- p.pu[LUMA_64x16].prim = fncdef x265_ ## fname ## _64x16_ ## cpu; \
165
- p.pu[LUMA_16x64].prim = fncdef x265_ ## fname ## _16x64_ ## cpu
166
+ p.pu[LUMA_8x8].prim = fncdef PFX(fname ## _8x8_ ## cpu); \
167
+ p.pu[LUMA_16x16].prim = fncdef PFX(fname ## _16x16_ ## cpu); \
168
+ p.pu[LUMA_32x32].prim = fncdef PFX(fname ## _32x32_ ## cpu); \
169
+ p.pu[LUMA_64x64].prim = fncdef PFX(fname ## _64x64_ ## cpu); \
170
+ p.pu[LUMA_8x4].prim = fncdef PFX(fname ## _8x4_ ## cpu); \
171
+ p.pu[LUMA_4x8].prim = fncdef PFX(fname ## _4x8_ ## cpu); \
172
+ p.pu[LUMA_16x8].prim = fncdef PFX(fname ## _16x8_ ## cpu); \
173
+ p.pu[LUMA_8x16].prim = fncdef PFX(fname ## _8x16_ ## cpu); \
174
+ p.pu[LUMA_16x32].prim = fncdef PFX(fname ## _16x32_ ## cpu); \
175
+ p.pu[LUMA_32x16].prim = fncdef PFX(fname ## _32x16_ ## cpu); \
176
+ p.pu[LUMA_64x32].prim = fncdef PFX(fname ## _64x32_ ## cpu); \
177
+ p.pu[LUMA_32x64].prim = fncdef PFX(fname ## _32x64_ ## cpu); \
178
+ p.pu[LUMA_16x12].prim = fncdef PFX(fname ## _16x12_ ## cpu); \
179
+ p.pu[LUMA_12x16].prim = fncdef PFX(fname ## _12x16_ ## cpu); \
180
+ p.pu[LUMA_16x4].prim = fncdef PFX(fname ## _16x4_ ## cpu); \
181
+ p.pu[LUMA_4x16].prim = fncdef PFX(fname ## _4x16_ ## cpu); \
182
+ p.pu[LUMA_32x24].prim = fncdef PFX(fname ## _32x24_ ## cpu); \
183
+ p.pu[LUMA_24x32].prim = fncdef PFX(fname ## _24x32_ ## cpu); \
184
+ p.pu[LUMA_32x8].prim = fncdef PFX(fname ## _32x8_ ## cpu); \
185
+ p.pu[LUMA_8x32].prim = fncdef PFX(fname ## _8x32_ ## cpu); \
186
+ p.pu[LUMA_64x48].prim = fncdef PFX(fname ## _64x48_ ## cpu); \
187
+ p.pu[LUMA_48x64].prim = fncdef PFX(fname ## _48x64_ ## cpu); \
188
+ p.pu[LUMA_64x16].prim = fncdef PFX(fname ## _64x16_ ## cpu); \
189
+ p.pu[LUMA_16x64].prim = fncdef PFX(fname ## _16x64_ ## cpu)
190
#define ALL_LUMA_PU(prim, fname, cpu) ALL_LUMA_PU_TYPED(prim, , fname, cpu)
191
192
#define ALL_LUMA_PU_T(prim, fname) \
193
194
p.pu[LUMA_16x64].prim = fname<LUMA_16x64>
195
196
#define ALL_CHROMA_420_CU_TYPED(prim, fncdef, fname, cpu) \
197
- p.chroma[X265_CSP_I420].cu[BLOCK_420_4x4].prim = fncdef x265_ ## fname ## _4x4_ ## cpu; \
198
- p.chroma[X265_CSP_I420].cu[BLOCK_420_8x8].prim = fncdef x265_ ## fname ## _8x8_ ## cpu; \
199
- p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].prim = fncdef x265_ ## fname ## _16x16_ ## cpu; \
200
- p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].prim = fncdef x265_ ## fname ## _32x32_ ## cpu
201
x265_1.7.tar.gz/source/common/x86/blockcopy8.asm -> x265_1.8.tar.gz/source/common/x86/blockcopy8.asm
Changed
175
1
2
;-----------------------------------------------------------------------------
3
%macro BLOCKCOPY_PS_W32_H4_avx2 2
4
INIT_YMM avx2
5
-cglobal blockcopy_ps_%1x%2, 4, 7, 3
6
+cglobal blockcopy_ps_%1x%2, 4, 7, 2
7
add r1, r1
8
mov r4d, %2/4
9
lea r5, [3 * r3]
10
lea r6, [3 * r1]
11
- pxor m0, m0
12
-
13
.loop:
14
- movu m1, [r2]
15
- punpcklbw m2, m1, m0
16
- punpckhbw m1, m1, m0
17
- vperm2i128 m3, m2, m1, 00100000b
18
- vperm2i128 m2, m2, m1, 00110001b
19
- movu [r0], m3
20
- movu [r0 + 32], m2
21
- movu m1, [r2 + r3]
22
- punpcklbw m2, m1, m0
23
- punpckhbw m1, m1, m0
24
- vperm2i128 m3, m2, m1, 00100000b
25
- vperm2i128 m2, m2, m1, 00110001b
26
- movu [r0 + r1], m3
27
- movu [r0 + r1 + 32], m2
28
- movu m1, [r2 + 2 * r3]
29
- punpcklbw m2, m1, m0
30
- punpckhbw m1, m1, m0
31
- vperm2i128 m3, m2, m1, 00100000b
32
- vperm2i128 m2, m2, m1, 00110001b
33
- movu [r0 + 2 * r1], m3
34
- movu [r0 + 2 * r1 + 32], m2
35
- movu m1, [r2 + r5]
36
- punpcklbw m2, m1, m0
37
- punpckhbw m1, m1, m0
38
- vperm2i128 m3, m2, m1, 00100000b
39
- vperm2i128 m2, m2, m1, 00110001b
40
- movu [r0 + r6], m3
41
- movu [r0 + r6 + 32], m2
42
-
43
+ pmovzxbw m0, [r2 + 0]
44
+ pmovzxbw m1, [r2 + 16]
45
+ movu [r0 + 0], m0
46
+ movu [r0 + 32], m1
47
+
48
+ pmovzxbw m0, [r2 + r3 + 0]
49
+ pmovzxbw m1, [r2 + r3 + 16]
50
+ movu [r0 + r1 + 0], m0
51
+ movu [r0 + r1 + 32], m1
52
+
53
+ pmovzxbw m0, [r2 + r3 * 2 + 0]
54
+ pmovzxbw m1, [r2 + r3 * 2 + 16]
55
+ movu [r0 + r1 * 2 + 0], m0
56
+ movu [r0 + r1 * 2 + 32], m1
57
+
58
+ pmovzxbw m0, [r2 + r5 + 0]
59
+ pmovzxbw m1, [r2 + r5 + 16]
60
+ movu [r0 + r6 + 0], m0
61
+ movu [r0 + r6 + 32], m1
62
lea r0, [r0 + 4 * r1]
63
lea r2, [r2 + 4 * r3]
64
dec r4d
65
66
INIT_YMM avx2
67
cglobal blockcopy_ps_64x64, 4, 7, 4
68
add r1, r1
69
- mov r4d, 64/4
70
+ mov r4d, 64/8
71
lea r5, [3 * r3]
72
lea r6, [3 * r1]
73
- pxor m0, m0
74
-
75
.loop:
76
- movu m1, [r2]
77
- punpcklbw m2, m1, m0
78
- punpckhbw m1, m1, m0
79
- vperm2i128 m3, m2, m1, 00100000b
80
- vperm2i128 m2, m2, m1, 00110001b
81
- movu [r0], m3
82
- movu [r0 + 32], m2
83
- movu m1, [r2 + 32]
84
- punpcklbw m2, m1, m0
85
- punpckhbw m1, m1, m0
86
- vperm2i128 m3, m2, m1, 00100000b
87
- vperm2i128 m2, m2, m1, 00110001b
88
- movu [r0 + 64], m3
89
- movu [r0 + 96], m2
90
- movu m1, [r2 + r3]
91
- punpcklbw m2, m1, m0
92
- punpckhbw m1, m1, m0
93
- vperm2i128 m3, m2, m1, 00100000b
94
- vperm2i128 m2, m2, m1, 00110001b
95
- movu [r0 + r1], m3
96
- movu [r0 + r1 + 32], m2
97
- movu m1, [r2 + r3 + 32]
98
- punpcklbw m2, m1, m0
99
- punpckhbw m1, m1, m0
100
- vperm2i128 m3, m2, m1, 00100000b
101
- vperm2i128 m2, m2, m1, 00110001b
102
- movu [r0 + r1 + 64], m3
103
- movu [r0 + r1 + 96], m2
104
- movu m1, [r2 + 2 * r3]
105
- punpcklbw m2, m1, m0
106
- punpckhbw m1, m1, m0
107
- vperm2i128 m3, m2, m1, 00100000b
108
- vperm2i128 m2, m2, m1, 00110001b
109
- movu [r0 + 2 * r1], m3
110
- movu [r0 + 2 * r1 + 32], m2
111
- movu m1, [r2 + 2 * r3 + 32]
112
- punpcklbw m2, m1, m0
113
- punpckhbw m1, m1, m0
114
- vperm2i128 m3, m2, m1, 00100000b
115
- vperm2i128 m2, m2, m1, 00110001b
116
- movu [r0 + 2 * r1 + 64], m3
117
- movu [r0 + 2 * r1 + 96], m2
118
- movu m1, [r2 + r5]
119
- punpcklbw m2, m1, m0
120
- punpckhbw m1, m1, m0
121
- vperm2i128 m3, m2, m1, 00100000b
122
- vperm2i128 m2, m2, m1, 00110001b
123
- movu [r0 + r6], m3
124
- movu [r0 + r6 + 32], m2
125
- movu m1, [r2 + r5 + 32]
126
- punpcklbw m2, m1, m0
127
- punpckhbw m1, m1, m0
128
- vperm2i128 m3, m2, m1, 00100000b
129
- vperm2i128 m2, m2, m1, 00110001b
130
- movu [r0 + r6 + 64], m3
131
- movu [r0 + r6 + 96], m2
132
-
133
+%rep 2
134
+ pmovzxbw m0, [r2 + 0]
135
+ pmovzxbw m1, [r2 + 16]
136
+ pmovzxbw m2, [r2 + 32]
137
+ pmovzxbw m3, [r2 + 48]
138
+ movu [r0 + 0], m0
139
+ movu [r0 + 32], m1
140
+ movu [r0 + 64], m2
141
+ movu [r0 + 96], m3
142
+
143
+ pmovzxbw m0, [r2 + r3 + 0]
144
+ pmovzxbw m1, [r2 + r3 + 16]
145
+ pmovzxbw m2, [r2 + r3 + 32]
146
+ pmovzxbw m3, [r2 + r3 + 48]
147
+ movu [r0 + r1 + 0], m0
148
+ movu [r0 + r1 + 32], m1
149
+ movu [r0 + r1 + 64], m2
150
+ movu [r0 + r1 + 96], m3
151
+
152
+ pmovzxbw m0, [r2 + r3 * 2 + 0]
153
+ pmovzxbw m1, [r2 + r3 * 2 + 16]
154
+ pmovzxbw m2, [r2 + r3 * 2 + 32]
155
+ pmovzxbw m3, [r2 + r3 * 2 + 48]
156
+ movu [r0 + r1 * 2 + 0], m0
157
+ movu [r0 + r1 * 2 + 32], m1
158
+ movu [r0 + r1 * 2 + 64], m2
159
+ movu [r0 + r1 * 2 + 96], m3
160
+
161
+ pmovzxbw m0, [r2 + r5 + 0]
162
+ pmovzxbw m1, [r2 + r5 + 16]
163
+ pmovzxbw m2, [r2 + r5 + 32]
164
+ pmovzxbw m3, [r2 + r5 + 48]
165
+ movu [r0 + r6 + 0], m0
166
+ movu [r0 + r6 + 32], m1
167
+ movu [r0 + r6 + 64], m2
168
+ movu [r0 + r6 + 96], m3
169
lea r0, [r0 + 4 * r1]
170
lea r2, [r2 + 4 * r3]
171
+%endrep
172
dec r4d
173
jnz .loop
174
RET
175
x265_1.7.tar.gz/source/common/x86/blockcopy8.h -> x265_1.8.tar.gz/source/common/x86/blockcopy8.h
Changed
201
1
2
#ifndef X265_BLOCKCOPY8_H
3
#define X265_BLOCKCOPY8_H
4
5
-void x265_cpy2Dto1D_shl_4_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
6
-void x265_cpy2Dto1D_shl_8_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
7
-void x265_cpy2Dto1D_shl_16_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
8
-void x265_cpy2Dto1D_shl_32_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
9
-void x265_cpy2Dto1D_shr_4_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
10
-void x265_cpy2Dto1D_shr_8_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
11
-void x265_cpy2Dto1D_shr_16_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
12
-void x265_cpy2Dto1D_shr_32_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
13
-void x265_cpy1Dto2D_shl_4_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
14
-void x265_cpy1Dto2D_shl_8_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
15
-void x265_cpy1Dto2D_shl_16_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
16
-void x265_cpy1Dto2D_shl_32_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
17
-void x265_cpy1Dto2D_shl_4_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift);
18
-void x265_cpy1Dto2D_shl_8_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift);
19
-void x265_cpy1Dto2D_shl_16_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift);
20
-void x265_cpy1Dto2D_shl_32_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift);
21
-void x265_cpy1Dto2D_shr_4_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
22
-void x265_cpy1Dto2D_shr_8_avx2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift);
23
-void x265_cpy1Dto2D_shr_16_avx2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift);
24
-void x265_cpy1Dto2D_shr_32_avx2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift);
25
-void x265_cpy1Dto2D_shr_4_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift);
26
-void x265_cpy1Dto2D_shr_8_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift);
27
-void x265_cpy1Dto2D_shr_16_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift);
28
-void x265_cpy1Dto2D_shr_32_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift);
29
-void x265_cpy2Dto1D_shl_8_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
30
-void x265_cpy2Dto1D_shl_16_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
31
-void x265_cpy2Dto1D_shl_32_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
32
-void x265_cpy2Dto1D_shr_8_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
33
-void x265_cpy2Dto1D_shr_16_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
34
-void x265_cpy2Dto1D_shr_32_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
35
-uint32_t x265_copy_cnt_4_sse4(int16_t* dst, const int16_t* src, intptr_t srcStride);
36
-uint32_t x265_copy_cnt_8_sse4(int16_t* dst, const int16_t* src, intptr_t srcStride);
37
-uint32_t x265_copy_cnt_16_sse4(int16_t* dst, const int16_t* src, intptr_t srcStride);
38
-uint32_t x265_copy_cnt_32_sse4(int16_t* dst, const int16_t* src, intptr_t srcStride);
39
-uint32_t x265_copy_cnt_4_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride);
40
-uint32_t x265_copy_cnt_8_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride);
41
-uint32_t x265_copy_cnt_16_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride);
42
-uint32_t x265_copy_cnt_32_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride);
43
+FUNCDEF_TU_S(void, cpy2Dto1D_shl, sse2, int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
44
+FUNCDEF_TU_S(void, cpy2Dto1D_shl, sse4, int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
45
+FUNCDEF_TU_S(void, cpy2Dto1D_shl, avx2, int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
46
47
-#define SETUP_BLOCKCOPY_FUNC(W, H, cpu) \
48
- void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb); \
49
- void x265_blockcopy_sp_ ## W ## x ## H ## cpu(pixel* a, intptr_t stridea, const int16_t* b, intptr_t strideb); \
50
- void x265_blockcopy_ss_ ## W ## x ## H ## cpu(int16_t* a, intptr_t stridea, const int16_t* b, intptr_t strideb);
51
+FUNCDEF_TU_S(void, cpy2Dto1D_shr, sse2, int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
52
+FUNCDEF_TU_S(void, cpy2Dto1D_shr, sse4, int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
53
+FUNCDEF_TU_S(void, cpy2Dto1D_shr, avx2, int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
54
55
-#define SETUP_BLOCKCOPY_PS(W, H, cpu) \
56
- void x265_blockcopy_ps_ ## W ## x ## H ## cpu(int16_t* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);
57
+FUNCDEF_TU_S(void, cpy1Dto2D_shl, sse2, int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
58
+FUNCDEF_TU_S(void, cpy1Dto2D_shl, sse4, int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
59
+FUNCDEF_TU_S(void, cpy1Dto2D_shl, avx2, int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
60
61
-#define SETUP_BLOCKCOPY_SP(W, H, cpu) \
62
- void x265_blockcopy_sp_ ## W ## x ## H ## cpu(pixel* a, intptr_t stridea, const int16_t* b, intptr_t strideb);
63
+FUNCDEF_TU_S(void, cpy1Dto2D_shr, sse2, int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
64
+FUNCDEF_TU_S(void, cpy1Dto2D_shr, sse4, int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
65
+FUNCDEF_TU_S(void, cpy1Dto2D_shr, avx2, int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
66
67
-#define SETUP_BLOCKCOPY_SS_PP(W, H, cpu) \
68
- void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb); \
69
- void x265_blockcopy_ss_ ## W ## x ## H ## cpu(int16_t* a, intptr_t stridea, const int16_t* b, intptr_t strideb);
70
+FUNCDEF_TU_S(uint32_t, copy_cnt, sse2, int16_t* dst, const int16_t* src, intptr_t srcStride);
71
+FUNCDEF_TU_S(uint32_t, copy_cnt, sse4, int16_t* dst, const int16_t* src, intptr_t srcStride);
72
+FUNCDEF_TU_S(uint32_t, copy_cnt, avx2, int16_t* dst, const int16_t* src, intptr_t srcStride);
73
74
-#define BLOCKCOPY_COMMON(cpu) \
75
- SETUP_BLOCKCOPY_FUNC(4, 4, cpu); \
76
- SETUP_BLOCKCOPY_FUNC(4, 2, cpu); \
77
- SETUP_BLOCKCOPY_FUNC(8, 8, cpu); \
78
- SETUP_BLOCKCOPY_FUNC(8, 4, cpu); \
79
- SETUP_BLOCKCOPY_FUNC(4, 8, cpu); \
80
- SETUP_BLOCKCOPY_FUNC(8, 6, cpu); \
81
- SETUP_BLOCKCOPY_FUNC(8, 2, cpu); \
82
- SETUP_BLOCKCOPY_FUNC(16, 16, cpu); \
83
- SETUP_BLOCKCOPY_FUNC(16, 8, cpu); \
84
- SETUP_BLOCKCOPY_FUNC(8, 16, cpu); \
85
- SETUP_BLOCKCOPY_FUNC(16, 12, cpu); \
86
- SETUP_BLOCKCOPY_FUNC(12, 16, cpu); \
87
- SETUP_BLOCKCOPY_FUNC(16, 4, cpu); \
88
- SETUP_BLOCKCOPY_FUNC(4, 16, cpu); \
89
- SETUP_BLOCKCOPY_FUNC(32, 32, cpu); \
90
- SETUP_BLOCKCOPY_FUNC(32, 16, cpu); \
91
- SETUP_BLOCKCOPY_FUNC(16, 32, cpu); \
92
- SETUP_BLOCKCOPY_FUNC(32, 24, cpu); \
93
- SETUP_BLOCKCOPY_FUNC(24, 32, cpu); \
94
- SETUP_BLOCKCOPY_FUNC(32, 8, cpu); \
95
- SETUP_BLOCKCOPY_FUNC(8, 32, cpu); \
96
- SETUP_BLOCKCOPY_FUNC(64, 64, cpu); \
97
- SETUP_BLOCKCOPY_FUNC(64, 32, cpu); \
98
- SETUP_BLOCKCOPY_FUNC(32, 64, cpu); \
99
- SETUP_BLOCKCOPY_FUNC(64, 48, cpu); \
100
- SETUP_BLOCKCOPY_FUNC(48, 64, cpu); \
101
- SETUP_BLOCKCOPY_FUNC(64, 16, cpu); \
102
- SETUP_BLOCKCOPY_FUNC(16, 64, cpu);
103
+FUNCDEF_TU(void, blockfill_s, sse2, int16_t* dst, intptr_t dstride, int16_t val);
104
+FUNCDEF_TU(void, blockfill_s, avx2, int16_t* dst, intptr_t dstride, int16_t val);
105
106
-#define BLOCKCOPY_SP(cpu) \
107
- SETUP_BLOCKCOPY_SP(2, 4, cpu); \
108
- SETUP_BLOCKCOPY_SP(2, 8, cpu); \
109
- SETUP_BLOCKCOPY_SP(6, 8, cpu); \
110
- \
111
- SETUP_BLOCKCOPY_SP(2, 16, cpu); \
112
- SETUP_BLOCKCOPY_SP(4, 32, cpu); \
113
- SETUP_BLOCKCOPY_SP(6, 16, cpu); \
114
- SETUP_BLOCKCOPY_SP(8, 12, cpu); \
115
- SETUP_BLOCKCOPY_SP(8, 64, cpu); \
116
- SETUP_BLOCKCOPY_SP(12, 32, cpu); \
117
- SETUP_BLOCKCOPY_SP(16, 24, cpu); \
118
- SETUP_BLOCKCOPY_SP(24, 64, cpu); \
119
- SETUP_BLOCKCOPY_SP(32, 48, cpu);
120
+FUNCDEF_CHROMA_PU(void, blockcopy_ss, sse2, int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride);
121
+FUNCDEF_CHROMA_PU(void, blockcopy_ss, avx, int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride);
122
123
-#define BLOCKCOPY_SS_PP(cpu) \
124
- SETUP_BLOCKCOPY_SS_PP(2, 4, cpu); \
125
- SETUP_BLOCKCOPY_SS_PP(2, 8, cpu); \
126
- SETUP_BLOCKCOPY_SS_PP(6, 8, cpu); \
127
- \
128
- SETUP_BLOCKCOPY_SS_PP(2, 16, cpu); \
129
- SETUP_BLOCKCOPY_SS_PP(4, 32, cpu); \
130
- SETUP_BLOCKCOPY_SS_PP(6, 16, cpu); \
131
- SETUP_BLOCKCOPY_SS_PP(8, 12, cpu); \
132
- SETUP_BLOCKCOPY_SS_PP(8, 64, cpu); \
133
- SETUP_BLOCKCOPY_SS_PP(12, 32, cpu); \
134
- SETUP_BLOCKCOPY_SS_PP(16, 24, cpu); \
135
- SETUP_BLOCKCOPY_SS_PP(24, 64, cpu); \
136
- SETUP_BLOCKCOPY_SS_PP(32, 48, cpu);
137
-
138
+FUNCDEF_CHROMA_PU(void, blockcopy_pp, sse2, pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);
139
+FUNCDEF_CHROMA_PU(void, blockcopy_pp, avx, pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);
140
141
-#define BLOCKCOPY_PS(cpu) \
142
- SETUP_BLOCKCOPY_PS(2, 4, cpu); \
143
- SETUP_BLOCKCOPY_PS(2, 8, cpu); \
144
- SETUP_BLOCKCOPY_PS(4, 2, cpu); \
145
- SETUP_BLOCKCOPY_PS(4, 4, cpu); \
146
- SETUP_BLOCKCOPY_PS(4, 8, cpu); \
147
- SETUP_BLOCKCOPY_PS(4, 16, cpu); \
148
- SETUP_BLOCKCOPY_PS(6, 8, cpu); \
149
- SETUP_BLOCKCOPY_PS(8, 2, cpu); \
150
- SETUP_BLOCKCOPY_PS(8, 4, cpu); \
151
- SETUP_BLOCKCOPY_PS(8, 6, cpu); \
152
- SETUP_BLOCKCOPY_PS(8, 8, cpu); \
153
- SETUP_BLOCKCOPY_PS(8, 16, cpu); \
154
- SETUP_BLOCKCOPY_PS(8, 32, cpu); \
155
- SETUP_BLOCKCOPY_PS(12, 16, cpu); \
156
- SETUP_BLOCKCOPY_PS(16, 4, cpu); \
157
- SETUP_BLOCKCOPY_PS(16, 8, cpu); \
158
- SETUP_BLOCKCOPY_PS(16, 12, cpu); \
159
- SETUP_BLOCKCOPY_PS(16, 16, cpu); \
160
- SETUP_BLOCKCOPY_PS(16, 32, cpu); \
161
- SETUP_BLOCKCOPY_PS(24, 32, cpu); \
162
- SETUP_BLOCKCOPY_PS(32, 8, cpu); \
163
- SETUP_BLOCKCOPY_PS(32, 16, cpu); \
164
- SETUP_BLOCKCOPY_PS(32, 24, cpu); \
165
- SETUP_BLOCKCOPY_PS(32, 32, cpu); \
166
- SETUP_BLOCKCOPY_PS(16, 64, cpu); \
167
- SETUP_BLOCKCOPY_PS(32, 64, cpu); \
168
- SETUP_BLOCKCOPY_PS(48, 64, cpu); \
169
- SETUP_BLOCKCOPY_PS(64, 16, cpu); \
170
- SETUP_BLOCKCOPY_PS(64, 32, cpu); \
171
- SETUP_BLOCKCOPY_PS(64, 48, cpu); \
172
- SETUP_BLOCKCOPY_PS(64, 64, cpu); \
173
- \
174
- SETUP_BLOCKCOPY_PS(2, 16, cpu); \
175
- SETUP_BLOCKCOPY_PS(4, 32, cpu); \
176
- SETUP_BLOCKCOPY_PS(6, 16, cpu); \
177
- SETUP_BLOCKCOPY_PS(8, 12, cpu); \
178
- SETUP_BLOCKCOPY_PS(8, 64, cpu); \
179
- SETUP_BLOCKCOPY_PS(12, 32, cpu); \
180
- SETUP_BLOCKCOPY_PS(16, 24, cpu); \
181
- SETUP_BLOCKCOPY_PS(24, 64, cpu); \
182
- SETUP_BLOCKCOPY_PS(32, 48, cpu);
183
-
184
-BLOCKCOPY_COMMON(_sse2);
185
-BLOCKCOPY_SS_PP(_sse2);
186
-BLOCKCOPY_SP(_sse4);
187
-BLOCKCOPY_PS(_sse4);
188
-
189
-BLOCKCOPY_SP(_sse2);
190
-
191
-void x265_blockfill_s_4x4_sse2(int16_t* dst, intptr_t dstride, int16_t val);
192
-void x265_blockfill_s_8x8_sse2(int16_t* dst, intptr_t dstride, int16_t val);
193
-void x265_blockfill_s_16x16_sse2(int16_t* dst, intptr_t dstride, int16_t val);
194
-void x265_blockfill_s_32x32_sse2(int16_t* dst, intptr_t dstride, int16_t val);
195
-void x265_blockcopy_ss_16x4_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride);
196
-void x265_blockcopy_ss_16x8_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride);
197
-void x265_blockcopy_ss_16x12_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride);
198
-void x265_blockcopy_ss_16x16_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride);
199
-void x265_blockcopy_ss_16x24_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride);
200
-void x265_blockcopy_ss_16x32_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride);
201
x265_1.7.tar.gz/source/common/x86/const-a.asm -> x265_1.8.tar.gz/source/common/x86/const-a.asm
Changed
86
1
2
const pb_16, times 32 db 16
3
const pb_32, times 32 db 32
4
const pb_64, times 32 db 64
5
-const pb_128, times 16 db 128
6
+const pb_128, times 32 db 128
7
const pb_a1, times 16 db 0xa1
8
9
const pb_01, times 8 db 0, 1
10
11
;; 16-bit constants
12
13
const pw_1, times 16 dw 1
14
-const pw_2, times 8 dw 2
15
+const pw_2, times 16 dw 2
16
+const pw_3, times 16 dw 3
17
+const pw_7, times 16 dw 7
18
const pw_m2, times 8 dw -2
19
const pw_4, times 8 dw 4
20
const pw_8, times 8 dw 8
21
22
const pw_256, times 16 dw 256
23
const pw_257, times 16 dw 257
24
const pw_512, times 16 dw 512
25
-const pw_1023, times 8 dw 1023
26
+const pw_1023, times 16 dw 1023
27
const pw_1024, times 16 dw 1024
28
+const pw_2048, times 16 dw 2048
29
const pw_4096, times 16 dw 4096
30
+const pw_8192, times 8 dw 8192
31
const pw_00ff, times 16 dw 0x00ff
32
const pw_ff00, times 8 dw 0xff00
33
const pw_2000, times 16 dw 0x2000
34
35
const pw_0_15, times 2 dw 0, 1, 2, 3, 4, 5, 6, 7
36
const pw_ppppmmmm, times 1 dw 1, 1, 1, 1, -1, -1, -1, -1
37
const pw_ppmmppmm, times 1 dw 1, 1, -1, -1, 1, 1, -1, -1
38
-const pw_pmpmpmpm, times 1 dw 1, -1, 1, -1, 1, -1, 1, -1
39
+const pw_pmpmpmpm, times 16 dw 1, -1, 1, -1, 1, -1, 1, -1
40
const pw_pmmpzzzz, times 1 dw 1, -1, -1, 1, 0, 0, 0, 0
41
const multi_2Row, times 1 dw 1, 2, 3, 4, 1, 2, 3, 4
42
const multiH, times 1 dw 9, 10, 11, 12, 13, 14, 15, 16
43
44
const pw_planar16_mul, times 1 dw 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
45
const pw_planar32_mul, times 1 dw 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16
46
const pw_FFFFFFFFFFFFFFF0, dw 0x00
47
- times 7 dw 0xff
48
+ times 7 dw 0xff
49
+const hmul_16p, times 16 db 1
50
+ times 8 db 1, -1
51
52
53
;; 32-bit constants
54
55
const pd_2, times 8 dd 2
56
const pd_4, times 4 dd 4
57
const pd_8, times 4 dd 8
58
-const pd_16, times 4 dd 16
59
-const pd_32, times 4 dd 32
60
+const pd_16, times 8 dd 16
61
+const pd_31, times 4 dd 31
62
+const pd_32, times 8 dd 32
63
const pd_64, times 4 dd 64
64
const pd_128, times 4 dd 128
65
const pd_256, times 4 dd 256
66
67
const pd_2048, times 4 dd 2048
68
const pd_ffff, times 4 dd 0xffff
69
const pd_32767, times 4 dd 32767
70
-const pd_n32768, times 4 dd 0xffff8000
71
+const pd_524416, times 4 dd 524416
72
+const pd_n32768, times 8 dd 0xffff8000
73
+const pd_n131072, times 4 dd 0xfffe0000
74
75
const trans8_shuf, times 1 dd 0, 4, 1, 5, 2, 6, 3, 7
76
-const deinterleave_shufd, times 1 dd 0, 4, 1, 5, 2, 6, 3, 7
77
78
const popcnt_table
79
%assign x 0
80
81
db ((x>>0)&1)+((x>>1)&1)+((x>>2)&1)+((x>>3)&1)+((x>>4)&1)+((x>>5)&1)+((x>>6)&1)+((x>>7)&1)
82
%assign x x+1
83
%endrep
84
-
85
-const sw_64, dd 64
86
x265_1.7.tar.gz/source/common/x86/dct8.asm -> x265_1.8.tar.gz/source/common/x86/dct8.asm
Changed
201
1
2
3
idct8_shuf1: dd 0, 2, 4, 6, 1, 3, 5, 7
4
5
-idct8_shuf2: times 2 db 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15
6
+const idct8_shuf2, times 2 db 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15
7
8
idct8_shuf3: times 2 db 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
9
10
11
cextern pd_2048
12
cextern pw_ppppmmmm
13
cextern trans8_shuf
14
+
15
+
16
+%if BIT_DEPTH == 12
17
+ %define DCT4_SHIFT 5
18
+ %define DCT4_ROUND 16
19
+ %define IDCT_SHIFT 8
20
+ %define IDCT_ROUND 128
21
+ %define DST4_SHIFT 5
22
+ %define DST4_ROUND 16
23
+ %define DCT8_SHIFT1 6
24
+ %define DCT8_ROUND1 32
25
+%elif BIT_DEPTH == 10
26
+ %define DCT4_SHIFT 3
27
+ %define DCT4_ROUND 4
28
+ %define IDCT_SHIFT 10
29
+ %define IDCT_ROUND 512
30
+ %define DST4_SHIFT 3
31
+ %define DST4_ROUND 4
32
+ %define DCT8_SHIFT1 4
33
+ %define DCT8_ROUND1 8
34
+%elif BIT_DEPTH == 8
35
+ %define DCT4_SHIFT 1
36
+ %define DCT4_ROUND 1
37
+ %define IDCT_SHIFT 12
38
+ %define IDCT_ROUND 2048
39
+ %define DST4_SHIFT 1
40
+ %define DST4_ROUND 1
41
+ %define DCT8_SHIFT1 2
42
+ %define DCT8_ROUND1 2
43
+%else
44
+ %error Unsupported BIT_DEPTH!
45
+%endif
46
+
47
+%define DCT8_ROUND2 256
48
+%define DCT8_SHIFT2 9
49
+
50
;------------------------------------------------------
51
;void dct4(const int16_t* src, int16_t* dst, intptr_t srcStride)
52
;------------------------------------------------------
53
INIT_XMM sse2
54
cglobal dct4, 3, 4, 8
55
-%if BIT_DEPTH == 10
56
- %define DCT_SHIFT 3
57
- mova m7, [pd_4]
58
-%elif BIT_DEPTH == 8
59
- %define DCT_SHIFT 1
60
- mova m7, [pd_1]
61
-%else
62
- %error Unsupported BIT_DEPTH!
63
-%endif
64
+ mova m7, [pd_ %+ DCT4_ROUND]
65
add r2d, r2d
66
lea r3, [tab_dct4]
67
68
69
psubw m2, m0
70
pmaddwd m0, m1, m4
71
paddd m0, m7
72
- psrad m0, DCT_SHIFT
73
+ psrad m0, DCT4_SHIFT
74
pmaddwd m3, m2, m5
75
paddd m3, m7
76
- psrad m3, DCT_SHIFT
77
+ psrad m3, DCT4_SHIFT
78
packssdw m0, m3
79
pshufd m0, m0, 0xD8
80
pshufhw m0, m0, 0xB1
81
pmaddwd m1, m6
82
paddd m1, m7
83
- psrad m1, DCT_SHIFT
84
+ psrad m1, DCT4_SHIFT
85
pmaddwd m2, [r3 + 3 * 16]
86
paddd m2, m7
87
- psrad m2, DCT_SHIFT
88
+ psrad m2, DCT4_SHIFT
89
packssdw m1, m2
90
pshufd m1, m1, 0xD8
91
pshufhw m1, m1, 0xB1
92
93
; - r2: source stride
94
INIT_YMM avx2
95
cglobal dct4, 3, 4, 8, src, dst, srcStride
96
-%if BIT_DEPTH == 10
97
- %define DCT_SHIFT 3
98
- vbroadcasti128 m7, [pd_4]
99
-%elif BIT_DEPTH == 8
100
- %define DCT_SHIFT 1
101
- vbroadcasti128 m7, [pd_1]
102
-%else
103
- %error Unsupported BIT_DEPTH!
104
-%endif
105
+ vbroadcasti128 m7, [pd_ %+ DCT4_ROUND]
106
add r2d, r2d
107
lea r3, [avx2_dct4]
108
109
110
111
pmaddwd m2, m5
112
paddd m2, m7
113
- psrad m2, DCT_SHIFT
114
+ psrad m2, DCT4_SHIFT
115
116
pmaddwd m0, m6
117
paddd m0, m7
118
- psrad m0, DCT_SHIFT
119
+ psrad m0, DCT4_SHIFT
120
121
packssdw m2, m0
122
pshufb m2, m4
123
124
;void idct4(const int16_t* src, int16_t* dst, intptr_t dstStride)
125
;-------------------------------------------------------
126
INIT_XMM sse2
127
-cglobal idct4, 3, 4, 7
128
-%if BIT_DEPTH == 8
129
- %define IDCT4_OFFSET [pd_2048]
130
- %define IDCT4_SHIFT 12
131
-%elif BIT_DEPTH == 10
132
- %define IDCT4_OFFSET [pd_512]
133
- %define IDCT4_SHIFT 10
134
-%else
135
- %error Unsupported BIT_DEPTH!
136
-%endif
137
+cglobal idct4, 3, 4, 6
138
add r2d, r2d
139
lea r3, [tab_dct4]
140
141
- mova m6, [pd_64]
142
-
143
movu m0, [r0 + 0 * 16]
144
movu m1, [r0 + 1 * 16]
145
146
punpcklwd m2, m0, m1
147
pmaddwd m3, m2, [r3 + 0 * 16] ; m3 = E1
148
- paddd m3, m6
149
+ paddd m3, [pd_64]
150
151
pmaddwd m2, [r3 + 2 * 16] ; m2 = E2
152
- paddd m2, m6
153
+ paddd m2, [pd_64]
154
155
punpckhwd m0, m1
156
pmaddwd m1, m0, [r3 + 1 * 16] ; m1 = O1
157
158
punpcklwd m0, m1, m4 ; m0 = m128iA
159
punpckhwd m1, m4 ; m1 = m128iD
160
161
- mova m6, IDCT4_OFFSET
162
-
163
punpcklwd m2, m0, m1
164
pmaddwd m3, m2, [r3 + 0 * 16]
165
- paddd m3, m6 ; m3 = E1
166
+ paddd m3, [pd_ %+ IDCT_ROUND] ; m3 = E1
167
168
pmaddwd m2, [r3 + 2 * 16]
169
- paddd m2, m6 ; m2 = E2
170
+ paddd m2, [pd_ %+ IDCT_ROUND] ; m2 = E2
171
172
punpckhwd m0, m1
173
pmaddwd m1, m0, [r3 + 1 * 16] ; m1 = O1
174
pmaddwd m0, [r3 + 3 * 16] ; m0 = O2
175
176
paddd m4, m3, m1
177
- psrad m4, IDCT4_SHIFT ; m4 = m128iA
178
+ psrad m4, IDCT_SHIFT ; m4 = m128iA
179
paddd m5, m2, m0
180
- psrad m5, IDCT4_SHIFT
181
+ psrad m5, IDCT_SHIFT
182
packssdw m4, m5 ; m4 = m128iA
183
184
psubd m2, m0
185
- psrad m2, IDCT4_SHIFT
186
+ psrad m2, IDCT_SHIFT
187
psubd m3, m1
188
- psrad m3, IDCT4_SHIFT
189
+ psrad m3, IDCT_SHIFT
190
packssdw m2, m3 ; m2 = m128iD
191
192
punpcklwd m1, m4, m2
193
194
movlps [r1 + 2 * r2], m1
195
lea r1, [r1 + 2 * r2]
196
movhps [r1 + r2], m1
197
+ RET
198
+
199
+;------------------------------------------------------
200
+;void dst4(const int16_t* src, int16_t* dst, intptr_t srcStride)
201
x265_1.7.tar.gz/source/common/x86/dct8.h -> x265_1.8.tar.gz/source/common/x86/dct8.h
Changed
45
1
2
3
#ifndef X265_DCT8_H
4
#define X265_DCT8_H
5
-void x265_dct4_sse2(const int16_t* src, int16_t* dst, intptr_t srcStride);
6
-void x265_dct8_sse2(const int16_t* src, int16_t* dst, intptr_t srcStride);
7
-void x265_dst4_ssse3(const int16_t* src, int16_t* dst, intptr_t srcStride);
8
-void x265_dst4_avx2(const int16_t* src, int16_t* dst, intptr_t srcStride);
9
-void x265_dct8_sse4(const int16_t* src, int16_t* dst, intptr_t srcStride);
10
-void x265_dct4_avx2(const int16_t* src, int16_t* dst, intptr_t srcStride);
11
-void x265_dct8_avx2(const int16_t* src, int16_t* dst, intptr_t srcStride);
12
-void x265_dct16_avx2(const int16_t* src, int16_t* dst, intptr_t srcStride);
13
-void x265_dct32_avx2(const int16_t* src, int16_t* dst, intptr_t srcStride);
14
15
-void x265_idst4_sse2(const int16_t* src, int16_t* dst, intptr_t dstStride);
16
-void x265_idst4_avx2(const int16_t* src, int16_t* dst, intptr_t dstStride);
17
-void x265_idct4_sse2(const int16_t* src, int16_t* dst, intptr_t dstStride);
18
-void x265_idct4_avx2(const int16_t* src, int16_t* dst, intptr_t dstStride);
19
-void x265_idct8_sse2(const int16_t* src, int16_t* dst, intptr_t dstStride);
20
-void x265_idct8_ssse3(const int16_t* src, int16_t* dst, intptr_t dstStride);
21
-void x265_idct8_avx2(const int16_t* src, int16_t* dst, intptr_t dstStride);
22
-void x265_idct16_avx2(const int16_t* src, int16_t* dst, intptr_t dstStride);
23
-void x265_idct32_avx2(const int16_t* src, int16_t* dst, intptr_t dstStride);
24
+FUNCDEF_TU_S2(void, dct, sse2, const int16_t* src, int16_t* dst, intptr_t srcStride);
25
+FUNCDEF_TU_S2(void, dct, ssse3, const int16_t* src, int16_t* dst, intptr_t srcStride);
26
+FUNCDEF_TU_S2(void, dct, sse4, const int16_t* src, int16_t* dst, intptr_t srcStride);
27
+FUNCDEF_TU_S2(void, dct, avx2, const int16_t* src, int16_t* dst, intptr_t srcStride);
28
29
-void x265_denoise_dct_sse4(int16_t* dct, uint32_t* sum, const uint16_t* offset, int size);
30
-void x265_denoise_dct_avx2(int16_t* dct, uint32_t* sum, const uint16_t* offset, int size);
31
+FUNCDEF_TU_S2(void, idct, sse2, const int16_t* src, int16_t* dst, intptr_t dstStride);
32
+FUNCDEF_TU_S2(void, idct, ssse3, const int16_t* src, int16_t* dst, intptr_t dstStride);
33
+FUNCDEF_TU_S2(void, idct, sse4, const int16_t* src, int16_t* dst, intptr_t dstStride);
34
+FUNCDEF_TU_S2(void, idct, avx2, const int16_t* src, int16_t* dst, intptr_t dstStride);
35
+
36
+void PFX(dst4_ssse3)(const int16_t* src, int16_t* dst, intptr_t srcStride);
37
+void PFX(dst4_sse2)(const int16_t* src, int16_t* dst, intptr_t srcStride);
38
+void PFX(idst4_sse2)(const int16_t* src, int16_t* dst, intptr_t srcStride);
39
+void PFX(dst4_avx2)(const int16_t* src, int16_t* dst, intptr_t srcStride);
40
+void PFX(idst4_avx2)(const int16_t* src, int16_t* dst, intptr_t srcStride);
41
+void PFX(denoise_dct_sse4)(int16_t* dct, uint32_t* sum, const uint16_t* offset, int size);
42
+void PFX(denoise_dct_avx2)(int16_t* dct, uint32_t* sum, const uint16_t* offset, int size);
43
44
#endif // ifndef X265_DCT8_H
45
x265_1.7.tar.gz/source/common/x86/intrapred.h -> x265_1.8.tar.gz/source/common/x86/intrapred.h
Changed
201
1
2
#ifndef X265_INTRAPRED_H
3
#define X265_INTRAPRED_H
4
5
-void x265_intra_pred_dc4_sse2(pixel* dst, intptr_t dstStride, const pixel*srcPix, int, int filter);
6
-void x265_intra_pred_dc8_sse2(pixel* dst, intptr_t dstStride, const pixel*srcPix, int, int filter);
7
-void x265_intra_pred_dc16_sse2(pixel* dst, intptr_t dstStride, const pixel*srcPix, int, int filter);
8
-void x265_intra_pred_dc32_sse2(pixel* dst, intptr_t dstStride, const pixel*srcPix, int, int filter);
9
-void x265_intra_pred_dc4_sse4(pixel* dst, intptr_t dstStride, const pixel*srcPix, int, int filter);
10
-void x265_intra_pred_dc8_sse4(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int filter);
11
-void x265_intra_pred_dc16_sse4(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int filter);
12
-void x265_intra_pred_dc32_sse4(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int filter);
13
-void x265_intra_pred_dc32_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int filter);
14
-
15
-void x265_intra_pred_planar4_sse2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
16
-void x265_intra_pred_planar8_sse2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
17
-void x265_intra_pred_planar16_sse2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
18
-void x265_intra_pred_planar32_sse2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
19
-void x265_intra_pred_planar4_sse4(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
20
-void x265_intra_pred_planar8_sse4(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
21
-void x265_intra_pred_planar16_sse4(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
22
-void x265_intra_pred_planar32_sse4(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
23
-void x265_intra_pred_planar16_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
24
-void x265_intra_pred_planar32_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
25
-
26
#define DECL_ANG(bsize, mode, cpu) \
27
- void x265_intra_pred_ang ## bsize ## _ ## mode ## _ ## cpu(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
28
+ void PFX(intra_pred_ang ## bsize ## _ ## mode ## _ ## cpu)(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
29
+
30
+#define DECL_ANGS(bsize, cpu) \
31
+ DECL_ANG(bsize, 2, cpu); \
32
+ DECL_ANG(bsize, 3, cpu); \
33
+ DECL_ANG(bsize, 4, cpu); \
34
+ DECL_ANG(bsize, 5, cpu); \
35
+ DECL_ANG(bsize, 6, cpu); \
36
+ DECL_ANG(bsize, 7, cpu); \
37
+ DECL_ANG(bsize, 8, cpu); \
38
+ DECL_ANG(bsize, 9, cpu); \
39
+ DECL_ANG(bsize, 10, cpu); \
40
+ DECL_ANG(bsize, 11, cpu); \
41
+ DECL_ANG(bsize, 12, cpu); \
42
+ DECL_ANG(bsize, 13, cpu); \
43
+ DECL_ANG(bsize, 14, cpu); \
44
+ DECL_ANG(bsize, 15, cpu); \
45
+ DECL_ANG(bsize, 16, cpu); \
46
+ DECL_ANG(bsize, 17, cpu); \
47
+ DECL_ANG(bsize, 18, cpu); \
48
+ DECL_ANG(bsize, 19, cpu); \
49
+ DECL_ANG(bsize, 20, cpu); \
50
+ DECL_ANG(bsize, 21, cpu); \
51
+ DECL_ANG(bsize, 22, cpu); \
52
+ DECL_ANG(bsize, 23, cpu); \
53
+ DECL_ANG(bsize, 24, cpu); \
54
+ DECL_ANG(bsize, 25, cpu); \
55
+ DECL_ANG(bsize, 26, cpu); \
56
+ DECL_ANG(bsize, 27, cpu); \
57
+ DECL_ANG(bsize, 28, cpu); \
58
+ DECL_ANG(bsize, 29, cpu); \
59
+ DECL_ANG(bsize, 30, cpu); \
60
+ DECL_ANG(bsize, 31, cpu); \
61
+ DECL_ANG(bsize, 32, cpu); \
62
+ DECL_ANG(bsize, 33, cpu); \
63
+ DECL_ANG(bsize, 34, cpu)
64
65
-DECL_ANG(4, 2, sse2);
66
-DECL_ANG(4, 3, sse2);
67
-DECL_ANG(4, 4, sse2);
68
-DECL_ANG(4, 5, sse2);
69
-DECL_ANG(4, 6, sse2);
70
-DECL_ANG(4, 7, sse2);
71
-DECL_ANG(4, 8, sse2);
72
-DECL_ANG(4, 9, sse2);
73
-DECL_ANG(4, 10, sse2);
74
-DECL_ANG(4, 11, sse2);
75
-DECL_ANG(4, 12, sse2);
76
-DECL_ANG(4, 13, sse2);
77
-DECL_ANG(4, 14, sse2);
78
-DECL_ANG(4, 15, sse2);
79
-DECL_ANG(4, 16, sse2);
80
-DECL_ANG(4, 17, sse2);
81
-DECL_ANG(4, 18, sse2);
82
-DECL_ANG(4, 26, sse2);
83
+#define DECL_ALL(cpu) \
84
+ FUNCDEF_TU(void, all_angs_pred, cpu, pixel *dest, pixel *refPix, pixel *filtPix, int bLuma); \
85
+ FUNCDEF_TU(void, intra_filter, cpu, const pixel *samples, pixel *filtered); \
86
+ DECL_ANGS(4, cpu); \
87
+ DECL_ANGS(8, cpu); \
88
+ DECL_ANGS(16, cpu); \
89
+ DECL_ANGS(32, cpu)
90
91
-DECL_ANG(4, 2, ssse3);
92
-DECL_ANG(4, 3, sse4);
93
-DECL_ANG(4, 4, sse4);
94
-DECL_ANG(4, 5, sse4);
95
-DECL_ANG(4, 6, sse4);
96
-DECL_ANG(4, 7, sse4);
97
-DECL_ANG(4, 8, sse4);
98
-DECL_ANG(4, 9, sse4);
99
-DECL_ANG(4, 10, sse4);
100
-DECL_ANG(4, 11, sse4);
101
-DECL_ANG(4, 12, sse4);
102
-DECL_ANG(4, 13, sse4);
103
-DECL_ANG(4, 14, sse4);
104
-DECL_ANG(4, 15, sse4);
105
-DECL_ANG(4, 16, sse4);
106
-DECL_ANG(4, 17, sse4);
107
-DECL_ANG(4, 18, sse4);
108
-DECL_ANG(4, 26, sse4);
109
-DECL_ANG(8, 2, ssse3);
110
-DECL_ANG(8, 3, sse4);
111
-DECL_ANG(8, 4, sse4);
112
-DECL_ANG(8, 5, sse4);
113
-DECL_ANG(8, 6, sse4);
114
-DECL_ANG(8, 7, sse4);
115
-DECL_ANG(8, 8, sse4);
116
-DECL_ANG(8, 9, sse4);
117
-DECL_ANG(8, 10, sse4);
118
-DECL_ANG(8, 11, sse4);
119
-DECL_ANG(8, 12, sse4);
120
-DECL_ANG(8, 13, sse4);
121
-DECL_ANG(8, 14, sse4);
122
-DECL_ANG(8, 15, sse4);
123
-DECL_ANG(8, 16, sse4);
124
-DECL_ANG(8, 17, sse4);
125
-DECL_ANG(8, 18, sse4);
126
-DECL_ANG(8, 19, sse4);
127
-DECL_ANG(8, 20, sse4);
128
-DECL_ANG(8, 21, sse4);
129
-DECL_ANG(8, 22, sse4);
130
-DECL_ANG(8, 23, sse4);
131
-DECL_ANG(8, 24, sse4);
132
-DECL_ANG(8, 25, sse4);
133
-DECL_ANG(8, 26, sse4);
134
-DECL_ANG(8, 27, sse4);
135
-DECL_ANG(8, 28, sse4);
136
-DECL_ANG(8, 29, sse4);
137
-DECL_ANG(8, 30, sse4);
138
-DECL_ANG(8, 31, sse4);
139
-DECL_ANG(8, 32, sse4);
140
-DECL_ANG(8, 33, sse4);
141
+FUNCDEF_TU_S2(void, intra_pred_dc, sse2, pixel* dst, intptr_t dstStride, const pixel*srcPix, int, int filter);
142
+FUNCDEF_TU_S2(void, intra_pred_dc, sse4, pixel* dst, intptr_t dstStride, const pixel*srcPix, int, int filter);
143
+FUNCDEF_TU_S2(void, intra_pred_dc, avx2, pixel* dst, intptr_t dstStride, const pixel*srcPix, int, int filter);
144
145
-DECL_ANG(16, 2, ssse3);
146
-DECL_ANG(16, 3, sse4);
147
-DECL_ANG(16, 4, sse4);
148
-DECL_ANG(16, 5, sse4);
149
-DECL_ANG(16, 6, sse4);
150
-DECL_ANG(16, 7, sse4);
151
-DECL_ANG(16, 8, sse4);
152
-DECL_ANG(16, 9, sse4);
153
-DECL_ANG(16, 10, sse4);
154
-DECL_ANG(16, 11, sse4);
155
-DECL_ANG(16, 12, sse4);
156
-DECL_ANG(16, 13, sse4);
157
-DECL_ANG(16, 14, sse4);
158
-DECL_ANG(16, 15, sse4);
159
-DECL_ANG(16, 16, sse4);
160
-DECL_ANG(16, 17, sse4);
161
-DECL_ANG(16, 18, sse4);
162
-DECL_ANG(16, 19, sse4);
163
-DECL_ANG(16, 20, sse4);
164
-DECL_ANG(16, 21, sse4);
165
-DECL_ANG(16, 22, sse4);
166
-DECL_ANG(16, 23, sse4);
167
-DECL_ANG(16, 24, sse4);
168
-DECL_ANG(16, 25, sse4);
169
-DECL_ANG(16, 26, sse4);
170
-DECL_ANG(16, 27, sse4);
171
-DECL_ANG(16, 28, sse4);
172
-DECL_ANG(16, 29, sse4);
173
-DECL_ANG(16, 30, sse4);
174
-DECL_ANG(16, 31, sse4);
175
-DECL_ANG(16, 32, sse4);
176
-DECL_ANG(16, 33, sse4);
177
+FUNCDEF_TU_S2(void, intra_pred_planar, sse2, pixel* dst, intptr_t dstStride, const pixel*srcPix, int, int filter);
178
+FUNCDEF_TU_S2(void, intra_pred_planar, sse4, pixel* dst, intptr_t dstStride, const pixel*srcPix, int, int filter);
179
+FUNCDEF_TU_S2(void, intra_pred_planar, avx2, pixel* dst, intptr_t dstStride, const pixel*srcPix, int, int filter);
180
181
-DECL_ANG(32, 2, ssse3);
182
-DECL_ANG(32, 3, sse4);
183
-DECL_ANG(32, 4, sse4);
184
-DECL_ANG(32, 5, sse4);
185
-DECL_ANG(32, 6, sse4);
186
-DECL_ANG(32, 7, sse4);
187
-DECL_ANG(32, 8, sse4);
188
-DECL_ANG(32, 9, sse4);
189
-DECL_ANG(32, 10, sse4);
190
-DECL_ANG(32, 11, sse4);
191
-DECL_ANG(32, 12, sse4);
192
-DECL_ANG(32, 13, sse4);
193
-DECL_ANG(32, 14, sse4);
194
-DECL_ANG(32, 15, sse4);
195
-DECL_ANG(32, 16, sse4);
196
-DECL_ANG(32, 17, sse4);
197
-DECL_ANG(32, 18, sse4);
198
-DECL_ANG(32, 19, sse4);
199
-DECL_ANG(32, 20, sse4);
200
-DECL_ANG(32, 21, sse4);
201
x265_1.7.tar.gz/source/common/x86/intrapred16.asm -> x265_1.8.tar.gz/source/common/x86/intrapred16.asm
Changed
201
1
2
%assign x x+1
3
%endrep
4
5
-const shuf_mode_13_23, db 0, 0, 14, 15, 6, 7, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
6
-const shuf_mode_14_22, db 14, 15, 10, 11, 4, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
7
-const shuf_mode_15_21, db 12, 13, 8, 9, 4, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
8
-const shuf_mode_16_20, db 2, 3, 0, 1, 14, 15, 12, 13, 8, 9, 6, 7, 2, 3, 0, 1
9
-const shuf_mode_17_19, db 0, 1, 14, 15, 12, 13, 10, 11, 6, 7, 4, 5, 2, 3, 0, 1
10
-const shuf_mode32_18, db 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1
11
-const pw_punpcklwd, db 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9
12
-const c_mode32_10_0, db 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
13
-
14
-const pw_unpackwdq, times 8 db 0,1
15
-const pw_ang8_12, db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 0, 1
16
-const pw_ang8_13, db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 8, 9, 0, 1
17
-const pw_ang8_14, db 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 10, 11, 4, 5, 0, 1
18
-const pw_ang8_15, db 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 8, 9, 4, 5, 0, 1
19
-const pw_ang8_16, db 0, 0, 0, 0, 0, 0, 12, 13, 10, 11, 6, 7, 4, 5, 0, 1
20
-const pw_ang8_17, db 0, 0, 14, 15, 12, 13, 10, 11, 8, 9, 4, 5, 2, 3, 0, 1
21
-const pw_swap16, db 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1
22
+const ang_table_avx2
23
+%assign x 0
24
+%rep 32
25
+ times 8 dw (32-x), x
26
+%assign x x+1
27
+%endrep
28
29
-const pw_ang16_13, db 14, 15, 8, 9, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
30
-const pw_ang16_16, db 0, 0, 0, 0, 0, 0, 10, 11, 8, 9, 6, 7, 2, 3, 0, 1
31
+const pw_ang16_12_24, db 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 14, 15, 0, 1, 0, 1
32
+const pw_ang16_13_23, db 2, 3, 2, 3, 14, 15, 14, 15, 6, 7, 6, 7, 0, 1, 0, 1
33
+const pw_ang16_14_22, db 2, 3, 2, 3, 10, 11, 10, 11, 6, 7, 6, 7, 0, 1, 0, 1
34
+const pw_ang16_15_21, db 12, 13, 12, 13, 8, 9, 8, 9, 4, 5, 4, 5, 0, 1, 0, 1
35
+const pw_ang16_16_20, db 8, 9, 8, 9, 6, 7, 6, 7, 2, 3, 2, 3, 0, 1, 0, 1
36
+
37
+const pw_ang32_12_24, db 0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7
38
+const pw_ang32_13_23, db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 6, 7, 0, 1
39
+const pw_ang32_14_22, db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 11, 6, 7, 0, 1
40
+const pw_ang32_15_21, db 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 8, 9, 4, 5, 0, 1
41
+const pw_ang32_16_20, db 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 6, 7, 2, 3, 0, 1
42
+const pw_ang32_17_19_0, db 0, 0, 0, 0, 12, 13, 10, 11, 8, 9, 6, 7, 2, 3, 0, 1
43
+
44
+const shuf_mode_13_23, db 0, 0, 14, 15, 6, 7, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
45
+const shuf_mode_14_22, db 14, 15, 10, 11, 4, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
46
+const shuf_mode_15_21, db 12, 13, 8, 9, 4, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
47
+const shuf_mode_16_20, db 2, 3, 0, 1, 14, 15, 12, 13, 8, 9, 6, 7, 2, 3, 0, 1
48
+const shuf_mode_17_19, db 0, 1, 14, 15, 12, 13, 10, 11, 6, 7, 4, 5, 2, 3, 0, 1
49
+const shuf_mode32_18, db 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1
50
+const pw_punpcklwd, db 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9
51
+const c_mode32_10_0, db 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
52
+
53
+const pw_ang8_12, db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 0, 1
54
+const pw_ang8_13, db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 8, 9, 0, 1
55
+const pw_ang8_14, db 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 10, 11, 4, 5, 0, 1
56
+const pw_ang8_15, db 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 8, 9, 4, 5, 0, 1
57
+const pw_ang8_16, db 0, 0, 0, 0, 0, 0, 12, 13, 10, 11, 6, 7, 4, 5, 0, 1
58
+const pw_ang8_17, db 0, 0, 14, 15, 12, 13, 10, 11, 8, 9, 4, 5, 2, 3, 0, 1
59
+const pw_swap16, times 2 db 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1
60
+
61
+const pw_ang16_13, db 14, 15, 8, 9, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
62
+const pw_ang16_16, db 0, 0, 0, 0, 0, 0, 10, 11, 8, 9, 6, 7, 2, 3, 0, 1
63
+
64
+intra_filter4_shuf0: db 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ,11, 12, 13
65
+intra_filter4_shuf1: db 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ,11, 12, 13
66
+intra_filter4_shuf2: times 2 db 4, 5, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
67
68
;; (blkSize - 1 - x)
69
-pw_planar4_0: dw 3, 2, 1, 0, 3, 2, 1, 0
70
-pw_planar4_1: dw 3, 3, 3, 3, 3, 3, 3, 3
71
-pw_planar8_0: dw 7, 6, 5, 4, 3, 2, 1, 0
72
-pw_planar8_1: dw 7, 7, 7, 7, 7, 7, 7, 7
73
-pw_planar16_0: dw 15, 14, 13, 12, 11, 10, 9, 8
74
-pw_planar16_1: dw 15, 15, 15, 15, 15, 15, 15, 15
75
-pd_planar32_1: dd 31, 31, 31, 31
76
-
77
-pw_planar32_1: dw 31, 31, 31, 31, 31, 31, 31, 31
78
-pw_planar32_L: dw 31, 30, 29, 28, 27, 26, 25, 24
79
-pw_planar32_H: dw 23, 22, 21, 20, 19, 18, 17, 16
80
+pw_planar4_0: dw 3, 2, 1, 0, 3, 2, 1, 0
81
82
const planar32_table
83
%assign x 31
84
85
86
SECTION .text
87
88
+cextern pb_01
89
cextern pw_1
90
cextern pw_2
91
+cextern pw_3
92
+cextern pw_7
93
cextern pw_4
94
cextern pw_8
95
+cextern pw_15
96
cextern pw_16
97
+cextern pw_31
98
cextern pw_32
99
-cextern pw_1023
100
cextern pd_16
101
+cextern pd_31
102
cextern pd_32
103
cextern pw_4096
104
+cextern pw_pixel_max
105
cextern multiL
106
cextern multiH
107
cextern multiH2
108
109
cextern pw_swap
110
cextern pb_unpackwq1
111
cextern pb_unpackwq2
112
+cextern pw_planar16_mul
113
+cextern pw_planar32_mul
114
115
;-----------------------------------------------------------------------------------
116
; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* above, int, int filter)
117
118
test r4d, r4d
119
120
paddw m0, [pw_4]
121
- psraw m0, 3
122
+ psrlw m0, 3
123
124
; store DC 4x4
125
movh [r0], m0
126
127
; filter top
128
movh m1, [r2 + 2]
129
paddw m1, m0
130
- psraw m1, 2
131
+ psrlw m1, 2
132
movh [r0], m1 ; overwrite top-left pixel, we will update it later
133
134
; filter top-left
135
136
; filter left
137
movu m1, [r2 + 20]
138
paddw m1, m0
139
- psraw m1, 2
140
+ psrlw m1, 2
141
movd r3d, m1
142
mov [r0 + r1 * 2], r3w
143
shr r3d, 16
144
145
pmaddwd m0, [pw_1]
146
147
paddw m0, [pw_8]
148
- psraw m0, 4 ; sum = sum / 16
149
+ psrlw m0, 4 ; sum = sum / 16
150
pshuflw m0, m0, 0
151
pshufd m0, m0, 0 ; m0 = word [dc_val ...]
152
153
154
; filter top
155
movu m0, [r2 + 2]
156
paddw m0, m1
157
- psraw m0, 2
158
+ psrlw m0, 2
159
movu [r0], m0
160
161
; filter top-left
162
163
; filter left
164
movu m0, [r2 + 36]
165
paddw m0, m1
166
- psraw m0, 2
167
+ psrlw m0, 2
168
movh r3, m0
169
mov [r0 + r1 * 2], r3w
170
shr r3, 16
171
172
paddw m0, m1
173
paddw m2, m3
174
paddw m0, m2
175
- movhlps m1, m0
176
- paddw m0, m1
177
- pshuflw m1, m0, 0x6E
178
- paddw m0, m1
179
- pmaddwd m0, [pw_1]
180
+ HADDUW m0, m1
181
+ paddd m0, [pd_16]
182
+ psrld m0, 5
183
184
- paddw m0, [pw_16]
185
- psraw m0, 5
186
movd r5d, m0
187
pshuflw m0, m0, 0 ; m0 = word [dc_val ...]
188
pshufd m0, m0, 0
189
190
; filter top
191
movu m2, [r2 + 2]
192
paddw m2, m1
193
- psraw m2, 2
194
+ psrlw m2, 2
195
movu [r0], m2
196
movu m3, [r2 + 18]
197
paddw m3, m1
198
- psraw m3, 2
199
+ psrlw m3, 2
200
movu [r0 + 16], m3
201
x265_1.7.tar.gz/source/common/x86/intrapred8.asm -> x265_1.8.tar.gz/source/common/x86/intrapred8.asm
Changed
201
1
2
intra_pred_shuff_0_8: times 2 db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
3
intra_pred_shuff_15_0: times 2 db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
4
5
+intra_filter4_shuf0: times 2 db 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
6
+intra_filter4_shuf1: times 2 db 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
7
+intra_filter4_shuf2: times 2 db 4, 5, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8
+
9
pb_0_8 times 8 db 0, 8
10
pb_unpackbw1 times 2 db 1, 8, 2, 8, 3, 8, 4, 8
11
pb_swap8: times 2 db 7, 6, 5, 4, 3, 2, 1, 0
12
13
intra_pred_shuff_0_15: db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 15
14
15
ALIGN 32
16
-c_ang16_mode_8: db 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13
17
- db 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18
18
- db 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23
19
- db 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28
20
- db 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1
21
- db 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6
22
- db 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11
23
- db 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
24
-
25
-ALIGN 32
26
c_ang16_mode_29: db 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18
27
db 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27
28
db 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13
29
30
db 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
31
32
ALIGN 32
33
-c_ang16_mode_7: db 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17
34
- db 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26
35
- db 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3
36
- db 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12
37
- db 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21
38
- db 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30
39
- db 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7
40
- db 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
41
-
42
-ALIGN 32
43
c_ang16_mode_30: db 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26
44
db 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20
45
db 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14
46
47
db 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22
48
db 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
49
50
-
51
-
52
-ALIGN 32
53
-c_ang16_mode_6: db 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21
54
- db 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2
55
- db 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15
56
- db 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28
57
- db 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9
58
- db 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22
59
- db 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3
60
- db 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
61
-
62
ALIGN 32
63
c_ang16_mode_31: db 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17
64
db 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19
65
66
db 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31
67
db 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
68
69
-
70
-ALIGN 32
71
-c_ang16_mode_5: db 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25
72
- db 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10
73
- db 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27
74
- db 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12
75
- db 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29
76
- db 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14
77
- db 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31
78
- db 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
79
-
80
-ALIGN 32
81
-c_ang16_mode_32: db 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21
82
- db 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31
83
- db 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20
84
- db 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30
85
- db 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19
86
- db 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29
87
- db 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18
88
- db 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28
89
- db 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17
90
- db 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27
91
- db 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
92
-
93
-ALIGN 32
94
-c_ang16_mode_4: db 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29
95
- db 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18
96
- db 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7
97
- db 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28
98
- db 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17
99
- db 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6
100
- db 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 13, 19, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27
101
- db 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
102
-
103
-ALIGN 32
104
-c_ang16_mode_33: db 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26
105
- db 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20
106
- db 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14
107
- db 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8
108
- db 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28
109
- db 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22
110
- db 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
111
- db 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10
112
- db 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30
113
- db 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24
114
- db 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18
115
- db 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12
116
- db 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6
117
- db 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0
118
-
119
-ALIGN 32
120
-c_ang16_mode_3: db 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10
121
- db 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4
122
- db 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30
123
- db 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24
124
- db 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18
125
- db 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12
126
- db 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6
127
- db 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0
128
-
129
ALIGN 32
130
c_ang16_mode_24: db 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22
131
db 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12
132
133
db 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11, 21, 11
134
db 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0
135
136
-
137
-ALIGN 32
138
-c_ang32_mode_33: db 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26
139
- db 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20
140
- db 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14
141
- db 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8
142
- db 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28
143
- db 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22
144
- db 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
145
- db 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10
146
- db 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30
147
- db 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24
148
- db 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18
149
- db 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12
150
- db 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6
151
- db 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26
152
- db 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20
153
- db 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14
154
- db 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8
155
- db 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28
156
- db 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22
157
- db 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
158
- db 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10
159
- db 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30
160
- db 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24
161
- db 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18
162
- db 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12
163
- db 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6
164
- db 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0
165
-
166
-
167
-
168
ALIGN 32
169
c_ang32_mode_25: db 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28
170
db 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24
171
172
db 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4
173
db 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0
174
175
-
176
-
177
ALIGN 32
178
c_ang32_mode_24: db 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22
179
db 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 15, 17, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12
180
181
ALIGN 32
182
;; (blkSize - 1 - x)
183
pw_planar4_0: dw 3, 2, 1, 0, 3, 2, 1, 0
184
-pw_planar4_1: dw 3, 3, 3, 3, 3, 3, 3, 3
185
-pw_planar8_0: dw 7, 6, 5, 4, 3, 2, 1, 0
186
-pw_planar8_1: dw 7, 7, 7, 7, 7, 7, 7, 7
187
-pw_planar16_0: dw 15, 14, 13, 12, 11, 10, 9, 8
188
-pw_planar16_1: dw 15, 15, 15, 15, 15, 15, 15, 15
189
-pw_planar32_1: dw 31, 31, 31, 31, 31, 31, 31, 31
190
-pw_planar32_L: dw 31, 30, 29, 28, 27, 26, 25, 24
191
-pw_planar32_H: dw 23, 22, 21, 20, 19, 18, 17, 16
192
-
193
ALIGN 32
194
c_ang8_mode_13: db 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14
195
db 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28
196
197
%assign x x+1
198
%endrep
199
200
+const ang_table_avx2
201
x265_1.7.tar.gz/source/common/x86/ipfilter16.asm -> x265_1.8.tar.gz/source/common/x86/ipfilter16.asm
Changed
201
1
2
;*
3
;* Authors: Nabajit Deka <nabajit@multicorewareinc.com>
4
;* Murugan Vairavel <murugan@multicorewareinc.com>
5
+;* Min Chen <chenm003@163.com>
6
;*
7
;* This program is free software; you can redistribute it and/or modify
8
;* it under the terms of the GNU General Public License as published by
9
10
%include "x86inc.asm"
11
%include "x86util.asm"
12
13
+
14
+%define INTERP_OFFSET_PP pd_32
15
+%define INTERP_SHIFT_PP 6
16
+
17
+%if BIT_DEPTH == 10
18
+ %define INTERP_SHIFT_PS 2
19
+ %define INTERP_OFFSET_PS pd_n32768
20
+ %define INTERP_SHIFT_SP 10
21
+ %define INTERP_OFFSET_SP pd_524800
22
+%elif BIT_DEPTH == 12
23
+ %define INTERP_SHIFT_PS 4
24
+ %define INTERP_OFFSET_PS pd_n131072
25
+ %define INTERP_SHIFT_SP 8
26
+ %define INTERP_OFFSET_SP pd_524416
27
+%else
28
+ %error Unsupport bit depth!
29
+%endif
30
+
31
+
32
SECTION_RODATA 32
33
34
-tab_c_32: times 4 dd 32
35
-tab_c_n32768: times 4 dd -32768
36
+tab_c_32: times 8 dd 32
37
tab_c_524800: times 4 dd 524800
38
tab_c_n8192: times 8 dw -8192
39
pd_524800: times 8 dd 524800
40
41
dw -2, 16, 54, -4
42
dw -2, 10, 58, -2
43
44
-tab_ChromaCoeffV: times 4 dw 0, 64
45
- times 4 dw 0, 0
46
+const tab_ChromaCoeffV, times 8 dw 0, 64
47
+ times 8 dw 0, 0
48
+
49
+ times 8 dw -2, 58
50
+ times 8 dw 10, -2
51
+
52
+ times 8 dw -4, 54
53
+ times 8 dw 16, -2
54
+
55
+ times 8 dw -6, 46
56
+ times 8 dw 28, -4
57
+
58
+ times 8 dw -4, 36
59
+ times 8 dw 36, -4
60
61
- times 4 dw -2, 58
62
- times 4 dw 10, -2
63
+ times 8 dw -4, 28
64
+ times 8 dw 46, -6
65
66
- times 4 dw -4, 54
67
- times 4 dw 16, -2
68
+ times 8 dw -2, 16
69
+ times 8 dw 54, -4
70
71
- times 4 dw -6, 46
72
- times 4 dw 28, -4
73
+ times 8 dw -2, 10
74
+ times 8 dw 58, -2
75
76
- times 4 dw -4, 36
77
- times 4 dw 36, -4
78
+tab_ChromaCoeffVer: times 8 dw 0, 64
79
+ times 8 dw 0, 0
80
81
- times 4 dw -4, 28
82
- times 4 dw 46, -6
83
+ times 8 dw -2, 58
84
+ times 8 dw 10, -2
85
86
- times 4 dw -2, 16
87
- times 4 dw 54, -4
88
+ times 8 dw -4, 54
89
+ times 8 dw 16, -2
90
91
- times 4 dw -2, 10
92
- times 4 dw 58, -2
93
+ times 8 dw -6, 46
94
+ times 8 dw 28, -4
95
+
96
+ times 8 dw -4, 36
97
+ times 8 dw 36, -4
98
+
99
+ times 8 dw -4, 28
100
+ times 8 dw 46, -6
101
+
102
+ times 8 dw -2, 16
103
+ times 8 dw 54, -4
104
+
105
+ times 8 dw -2, 10
106
+ times 8 dw 58, -2
107
108
tab_LumaCoeff: dw 0, 0, 0, 64, 0, 0, 0, 0
109
dw -1, 4, -10, 58, 17, -5, 1, 0
110
111
112
const interp8_hps_shuf, dd 0, 4, 1, 5, 2, 6, 3, 7
113
114
+const interp8_hpp_shuf, db 0, 1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6, 7, 8, 9
115
+ db 4, 5, 6, 7, 8, 9, 10, 11, 6, 7, 8, 9, 10, 11, 12, 13
116
+
117
+const pb_shuf, db 0, 1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6, 7, 8, 9
118
+ db 4, 5, 6, 7, 8, 9, 10, 11, 6, 7, 8, 9, 10, 11, 12, 13
119
+
120
+
121
SECTION .text
122
+cextern pd_8
123
cextern pd_32
124
cextern pw_pixel_max
125
+cextern pd_524416
126
cextern pd_n32768
127
+cextern pd_n131072
128
cextern pw_2000
129
+cextern idct8_shuf2
130
+
131
+%macro FILTER_LUMA_HOR_4_sse2 1
132
+ movu m4, [r0 + %1] ; m4 = src[0-7]
133
+ movu m5, [r0 + %1 + 2] ; m5 = src[1-8]
134
+ pmaddwd m4, m0
135
+ pmaddwd m5, m0
136
+ pshufd m2, m4, q2301
137
+ paddd m4, m2
138
+ pshufd m2, m5, q2301
139
+ paddd m5, m2
140
+ pshufd m4, m4, q3120
141
+ pshufd m5, m5, q3120
142
+ punpcklqdq m4, m5
143
+
144
+ movu m5, [r0 + %1 + 4] ; m5 = src[2-9]
145
+ movu m3, [r0 + %1 + 6] ; m3 = src[3-10]
146
+ pmaddwd m5, m0
147
+ pmaddwd m3, m0
148
+ pshufd m2, m5, q2301
149
+ paddd m5, m2
150
+ pshufd m2, m3, q2301
151
+ paddd m3, m2
152
+ pshufd m5, m5, q3120
153
+ pshufd m3, m3, q3120
154
+ punpcklqdq m5, m3
155
+
156
+ pshufd m2, m4, q2301
157
+ paddd m4, m2
158
+ pshufd m2, m5, q2301
159
+ paddd m5, m2
160
+ pshufd m4, m4, q3120
161
+ pshufd m5, m5, q3120
162
+ punpcklqdq m4, m5
163
+ paddd m4, m1
164
+%endmacro
165
+
166
+%macro FILTER_LUMA_HOR_8_sse2 1
167
+ movu m4, [r0 + %1] ; m4 = src[0-7]
168
+ movu m5, [r0 + %1 + 2] ; m5 = src[1-8]
169
+ pmaddwd m4, m0
170
+ pmaddwd m5, m0
171
+ pshufd m2, m4, q2301
172
+ paddd m4, m2
173
+ pshufd m2, m5, q2301
174
+ paddd m5, m2
175
+ pshufd m4, m4, q3120
176
+ pshufd m5, m5, q3120
177
+ punpcklqdq m4, m5
178
+
179
+ movu m5, [r0 + %1 + 4] ; m5 = src[2-9]
180
+ movu m3, [r0 + %1 + 6] ; m3 = src[3-10]
181
+ pmaddwd m5, m0
182
+ pmaddwd m3, m0
183
+ pshufd m2, m5, q2301
184
+ paddd m5, m2
185
+ pshufd m2, m3, q2301
186
+ paddd m3, m2
187
+ pshufd m5, m5, q3120
188
+ pshufd m3, m3, q3120
189
+ punpcklqdq m5, m3
190
+
191
+ pshufd m2, m4, q2301
192
+ paddd m4, m2
193
+ pshufd m2, m5, q2301
194
+ paddd m5, m2
195
+ pshufd m4, m4, q3120
196
+ pshufd m5, m5, q3120
197
+ punpcklqdq m4, m5
198
+ paddd m4, m1
199
+
200
+ movu m5, [r0 + %1 + 8] ; m5 = src[4-11]
201
x265_1.7.tar.gz/source/common/x86/ipfilter8.asm -> x265_1.8.tar.gz/source/common/x86/ipfilter8.asm
Changed
201
1
2
cextern pw_32
3
cextern pw_512
4
cextern pw_2000
5
+cextern pw_8192
6
7
%macro FILTER_H4_w2_2_sse2 0
8
pxor m3, m3
9
10
%endmacro
11
12
;-----------------------------------------------------------------------------
13
-; void interp_4tap_horiz_pp_2x4(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
14
-;-----------------------------------------------------------------------------
15
-INIT_XMM sse3
16
-cglobal interp_4tap_horiz_pp_2x4, 4, 6, 6, src, srcstride, dst, dststride
17
- mov r4d, r4m
18
- mova m5, [pw_32]
19
-
20
-%ifdef PIC
21
- lea r5, [tabw_ChromaCoeff]
22
- movddup m4, [r5 + r4 * 8]
23
-%else
24
- movddup m4, [tabw_ChromaCoeff + r4 * 8]
25
-%endif
26
-
27
- FILTER_H4_w2_2_sse2
28
- lea srcq, [srcq + srcstrideq * 2]
29
- lea dstq, [dstq + dststrideq * 2]
30
- FILTER_H4_w2_2_sse2
31
-
32
- RET
33
-
34
-;-----------------------------------------------------------------------------
35
-; void interp_4tap_horiz_pp_2x8(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
36
+; void interp_4tap_horiz_pp_2xN(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
37
;-----------------------------------------------------------------------------
38
+%macro FILTER_H4_W2xN_sse3 1
39
INIT_XMM sse3
40
-cglobal interp_4tap_horiz_pp_2x8, 4, 6, 6, src, srcstride, dst, dststride
41
- mov r4d, r4m
42
- mova m5, [pw_32]
43
+cglobal interp_4tap_horiz_pp_2x%1, 4, 6, 6, src, srcstride, dst, dststride
44
+ mov r4d, r4m
45
+ mova m5, [pw_32]
46
47
%ifdef PIC
48
- lea r5, [tabw_ChromaCoeff]
49
- movddup m4, [r5 + r4 * 8]
50
+ lea r5, [tabw_ChromaCoeff]
51
+ movddup m4, [r5 + r4 * 8]
52
%else
53
- movddup m4, [tabw_ChromaCoeff + r4 * 8]
54
+ movddup m4, [tabw_ChromaCoeff + r4 * 8]
55
%endif
56
57
%assign x 1
58
-%rep 4
59
+%rep %1/2
60
FILTER_H4_w2_2_sse2
61
-%if x < 4
62
- lea srcq, [srcq + srcstrideq * 2]
63
- lea dstq, [dstq + dststrideq * 2]
64
+%if x < %1/2
65
+ lea srcq, [srcq + srcstrideq * 2]
66
+ lea dstq, [dstq + dststrideq * 2]
67
%endif
68
%assign x x+1
69
%endrep
70
71
RET
72
73
-;-----------------------------------------------------------------------------
74
-; void interp_4tap_horiz_pp_2x16(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
75
-;-----------------------------------------------------------------------------
76
-INIT_XMM sse3
77
-cglobal interp_4tap_horiz_pp_2x16, 4, 6, 6, src, srcstride, dst, dststride
78
- mov r4d, r4m
79
- mova m5, [pw_32]
80
-
81
-%ifdef PIC
82
- lea r5, [tabw_ChromaCoeff]
83
- movddup m4, [r5 + r4 * 8]
84
-%else
85
- movddup m4, [tabw_ChromaCoeff + r4 * 8]
86
-%endif
87
-
88
-%assign x 1
89
-%rep 8
90
- FILTER_H4_w2_2_sse2
91
-%if x < 8
92
- lea srcq, [srcq + srcstrideq * 2]
93
- lea dstq, [dstq + dststrideq * 2]
94
-%endif
95
-%assign x x+1
96
-%endrep
97
+%endmacro
98
99
- RET
100
+ FILTER_H4_W2xN_sse3 4
101
+ FILTER_H4_W2xN_sse3 8
102
+ FILTER_H4_W2xN_sse3 16
103
104
%macro FILTER_H4_w4_2_sse2 0
105
pxor m5, m5
106
107
%endmacro
108
109
;-----------------------------------------------------------------------------
110
-; void interp_4tap_horiz_pp_4x2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
111
-;-----------------------------------------------------------------------------
112
-INIT_XMM sse3
113
-cglobal interp_4tap_horiz_pp_4x2, 4, 6, 8, src, srcstride, dst, dststride
114
- mov r4d, r4m
115
- mova m7, [pw_32]
116
-
117
-%ifdef PIC
118
- lea r5, [tabw_ChromaCoeff]
119
- movddup m4, [r5 + r4 * 8]
120
-%else
121
- movddup m4, [tabw_ChromaCoeff + r4 * 8]
122
-%endif
123
-
124
- FILTER_H4_w4_2_sse2
125
-
126
- RET
127
-
128
-;-----------------------------------------------------------------------------
129
-; void interp_4tap_horiz_pp_4x4(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
130
-;-----------------------------------------------------------------------------
131
-INIT_XMM sse3
132
-cglobal interp_4tap_horiz_pp_4x4, 4, 6, 8, src, srcstride, dst, dststride
133
- mov r4d, r4m
134
- mova m7, [pw_32]
135
-
136
-%ifdef PIC
137
- lea r5, [tabw_ChromaCoeff]
138
- movddup m4, [r5 + r4 * 8]
139
-%else
140
- movddup m4, [tabw_ChromaCoeff + r4 * 8]
141
-%endif
142
-
143
- FILTER_H4_w4_2_sse2
144
- lea srcq, [srcq + srcstrideq * 2]
145
- lea dstq, [dstq + dststrideq * 2]
146
- FILTER_H4_w4_2_sse2
147
-
148
- RET
149
-
150
-;-----------------------------------------------------------------------------
151
-; void interp_4tap_horiz_pp_4x8(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
152
-;-----------------------------------------------------------------------------
153
-INIT_XMM sse3
154
-cglobal interp_4tap_horiz_pp_4x8, 4, 6, 8, src, srcstride, dst, dststride
155
- mov r4d, r4m
156
- mova m7, [pw_32]
157
-
158
-%ifdef PIC
159
- lea r5, [tabw_ChromaCoeff]
160
- movddup m4, [r5 + r4 * 8]
161
-%else
162
- movddup m4, [tabw_ChromaCoeff + r4 * 8]
163
-%endif
164
-
165
-%assign x 1
166
-%rep 4
167
- FILTER_H4_w4_2_sse2
168
-%if x < 4
169
- lea srcq, [srcq + srcstrideq * 2]
170
- lea dstq, [dstq + dststrideq * 2]
171
-%endif
172
-%assign x x+1
173
-%endrep
174
-
175
- RET
176
-
177
-;-----------------------------------------------------------------------------
178
-; void interp_4tap_horiz_pp_4x16(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
179
-;-----------------------------------------------------------------------------
180
-INIT_XMM sse3
181
-cglobal interp_4tap_horiz_pp_4x16, 4, 6, 8, src, srcstride, dst, dststride
182
- mov r4d, r4m
183
- mova m7, [pw_32]
184
-
185
-%ifdef PIC
186
- lea r5, [tabw_ChromaCoeff]
187
- movddup m4, [r5 + r4 * 8]
188
-%else
189
- movddup m4, [tabw_ChromaCoeff + r4 * 8]
190
-%endif
191
-
192
-%assign x 1
193
-%rep 8
194
- FILTER_H4_w4_2_sse2
195
-%if x < 8
196
- lea srcq, [srcq + srcstrideq * 2]
197
- lea dstq, [dstq + dststrideq * 2]
198
-%endif
199
-%assign x x+1
200
-%endrep
201
x265_1.7.tar.gz/source/common/x86/ipfilter8.h -> x265_1.8.tar.gz/source/common/x86/ipfilter8.h
Changed
201
1
2
#ifndef X265_IPFILTER8_H
3
#define X265_IPFILTER8_H
4
5
-#define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
6
- void x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx); \
7
- void x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); \
8
- void x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx); \
9
- void x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
10
-
11
-#define LUMA_FILTERS(cpu) \
12
- SETUP_LUMA_FUNC_DEF(4, 4, cpu); \
13
- SETUP_LUMA_FUNC_DEF(8, 8, cpu); \
14
- SETUP_LUMA_FUNC_DEF(8, 4, cpu); \
15
- SETUP_LUMA_FUNC_DEF(4, 8, cpu); \
16
- SETUP_LUMA_FUNC_DEF(16, 16, cpu); \
17
- SETUP_LUMA_FUNC_DEF(16, 8, cpu); \
18
- SETUP_LUMA_FUNC_DEF(8, 16, cpu); \
19
- SETUP_LUMA_FUNC_DEF(16, 12, cpu); \
20
- SETUP_LUMA_FUNC_DEF(12, 16, cpu); \
21
- SETUP_LUMA_FUNC_DEF(16, 4, cpu); \
22
- SETUP_LUMA_FUNC_DEF(4, 16, cpu); \
23
- SETUP_LUMA_FUNC_DEF(32, 32, cpu); \
24
- SETUP_LUMA_FUNC_DEF(32, 16, cpu); \
25
- SETUP_LUMA_FUNC_DEF(16, 32, cpu); \
26
- SETUP_LUMA_FUNC_DEF(32, 24, cpu); \
27
- SETUP_LUMA_FUNC_DEF(24, 32, cpu); \
28
- SETUP_LUMA_FUNC_DEF(32, 8, cpu); \
29
- SETUP_LUMA_FUNC_DEF(8, 32, cpu); \
30
- SETUP_LUMA_FUNC_DEF(64, 64, cpu); \
31
- SETUP_LUMA_FUNC_DEF(64, 32, cpu); \
32
- SETUP_LUMA_FUNC_DEF(32, 64, cpu); \
33
- SETUP_LUMA_FUNC_DEF(64, 48, cpu); \
34
- SETUP_LUMA_FUNC_DEF(48, 64, cpu); \
35
- SETUP_LUMA_FUNC_DEF(64, 16, cpu); \
36
- SETUP_LUMA_FUNC_DEF(16, 64, cpu)
37
-
38
-#define SETUP_LUMA_SP_FUNC_DEF(W, H, cpu) \
39
- void x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
40
-
41
-#define LUMA_SP_FILTERS(cpu) \
42
- SETUP_LUMA_SP_FUNC_DEF(4, 4, cpu); \
43
- SETUP_LUMA_SP_FUNC_DEF(8, 8, cpu); \
44
- SETUP_LUMA_SP_FUNC_DEF(8, 4, cpu); \
45
- SETUP_LUMA_SP_FUNC_DEF(4, 8, cpu); \
46
- SETUP_LUMA_SP_FUNC_DEF(16, 16, cpu); \
47
- SETUP_LUMA_SP_FUNC_DEF(16, 8, cpu); \
48
- SETUP_LUMA_SP_FUNC_DEF(8, 16, cpu); \
49
- SETUP_LUMA_SP_FUNC_DEF(16, 12, cpu); \
50
- SETUP_LUMA_SP_FUNC_DEF(12, 16, cpu); \
51
- SETUP_LUMA_SP_FUNC_DEF(16, 4, cpu); \
52
- SETUP_LUMA_SP_FUNC_DEF(4, 16, cpu); \
53
- SETUP_LUMA_SP_FUNC_DEF(32, 32, cpu); \
54
- SETUP_LUMA_SP_FUNC_DEF(32, 16, cpu); \
55
- SETUP_LUMA_SP_FUNC_DEF(16, 32, cpu); \
56
- SETUP_LUMA_SP_FUNC_DEF(32, 24, cpu); \
57
- SETUP_LUMA_SP_FUNC_DEF(24, 32, cpu); \
58
- SETUP_LUMA_SP_FUNC_DEF(32, 8, cpu); \
59
- SETUP_LUMA_SP_FUNC_DEF(8, 32, cpu); \
60
- SETUP_LUMA_SP_FUNC_DEF(64, 64, cpu); \
61
- SETUP_LUMA_SP_FUNC_DEF(64, 32, cpu); \
62
- SETUP_LUMA_SP_FUNC_DEF(32, 64, cpu); \
63
- SETUP_LUMA_SP_FUNC_DEF(64, 48, cpu); \
64
- SETUP_LUMA_SP_FUNC_DEF(48, 64, cpu); \
65
- SETUP_LUMA_SP_FUNC_DEF(64, 16, cpu); \
66
- SETUP_LUMA_SP_FUNC_DEF(16, 64, cpu);
67
-
68
-#define SETUP_LUMA_SS_FUNC_DEF(W, H, cpu) \
69
- void x265_interp_8tap_vert_ss_ ## W ## x ## H ## cpu(const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
70
-
71
-#define LUMA_SS_FILTERS(cpu) \
72
- SETUP_LUMA_SS_FUNC_DEF(4, 4, cpu); \
73
- SETUP_LUMA_SS_FUNC_DEF(8, 8, cpu); \
74
- SETUP_LUMA_SS_FUNC_DEF(8, 4, cpu); \
75
- SETUP_LUMA_SS_FUNC_DEF(4, 8, cpu); \
76
- SETUP_LUMA_SS_FUNC_DEF(16, 16, cpu); \
77
- SETUP_LUMA_SS_FUNC_DEF(16, 8, cpu); \
78
- SETUP_LUMA_SS_FUNC_DEF(8, 16, cpu); \
79
- SETUP_LUMA_SS_FUNC_DEF(16, 12, cpu); \
80
- SETUP_LUMA_SS_FUNC_DEF(12, 16, cpu); \
81
- SETUP_LUMA_SS_FUNC_DEF(16, 4, cpu); \
82
- SETUP_LUMA_SS_FUNC_DEF(4, 16, cpu); \
83
- SETUP_LUMA_SS_FUNC_DEF(32, 32, cpu); \
84
- SETUP_LUMA_SS_FUNC_DEF(32, 16, cpu); \
85
- SETUP_LUMA_SS_FUNC_DEF(16, 32, cpu); \
86
- SETUP_LUMA_SS_FUNC_DEF(32, 24, cpu); \
87
- SETUP_LUMA_SS_FUNC_DEF(24, 32, cpu); \
88
- SETUP_LUMA_SS_FUNC_DEF(32, 8, cpu); \
89
- SETUP_LUMA_SS_FUNC_DEF(8, 32, cpu); \
90
- SETUP_LUMA_SS_FUNC_DEF(64, 64, cpu); \
91
- SETUP_LUMA_SS_FUNC_DEF(64, 32, cpu); \
92
- SETUP_LUMA_SS_FUNC_DEF(32, 64, cpu); \
93
- SETUP_LUMA_SS_FUNC_DEF(64, 48, cpu); \
94
- SETUP_LUMA_SS_FUNC_DEF(48, 64, cpu); \
95
- SETUP_LUMA_SS_FUNC_DEF(64, 16, cpu); \
96
- SETUP_LUMA_SS_FUNC_DEF(16, 64, cpu);
97
-
98
-#if HIGH_BIT_DEPTH
99
-
100
-#define SETUP_CHROMA_420_VERT_FUNC_DEF(W, H, cpu) \
101
- void x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu(const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx); \
102
- void x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx); \
103
- void x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx); \
104
- void x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
105
-
106
-#define CHROMA_420_VERT_FILTERS(cpu) \
107
- SETUP_CHROMA_420_VERT_FUNC_DEF(4, 4, cpu); \
108
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 8, cpu); \
109
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 4, cpu); \
110
- SETUP_CHROMA_420_VERT_FUNC_DEF(4, 8, cpu); \
111
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 6, cpu); \
112
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 2, cpu); \
113
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 16, cpu); \
114
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 8, cpu); \
115
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 16, cpu); \
116
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 12, cpu); \
117
- SETUP_CHROMA_420_VERT_FUNC_DEF(12, 16, cpu); \
118
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 4, cpu); \
119
- SETUP_CHROMA_420_VERT_FUNC_DEF(4, 16, cpu); \
120
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 32, cpu); \
121
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 16, cpu); \
122
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 32, cpu); \
123
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 24, cpu); \
124
- SETUP_CHROMA_420_VERT_FUNC_DEF(24, 32, cpu); \
125
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 8, cpu); \
126
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 32, cpu)
127
-
128
-#define CHROMA_420_VERT_FILTERS_SSE4(cpu) \
129
- SETUP_CHROMA_420_VERT_FUNC_DEF(2, 4, cpu); \
130
- SETUP_CHROMA_420_VERT_FUNC_DEF(2, 8, cpu); \
131
- SETUP_CHROMA_420_VERT_FUNC_DEF(4, 2, cpu); \
132
- SETUP_CHROMA_420_VERT_FUNC_DEF(6, 8, cpu);
133
-
134
-#define CHROMA_422_VERT_FILTERS(cpu) \
135
- SETUP_CHROMA_420_VERT_FUNC_DEF(4, 8, cpu); \
136
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 16, cpu); \
137
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 8, cpu); \
138
- SETUP_CHROMA_420_VERT_FUNC_DEF(4, 16, cpu); \
139
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 12, cpu); \
140
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 4, cpu); \
141
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 32, cpu); \
142
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 16, cpu); \
143
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 32, cpu); \
144
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 24, cpu); \
145
- SETUP_CHROMA_420_VERT_FUNC_DEF(12, 32, cpu); \
146
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 8, cpu); \
147
- SETUP_CHROMA_420_VERT_FUNC_DEF(4, 32, cpu); \
148
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 64, cpu); \
149
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 32, cpu); \
150
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 64, cpu); \
151
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 48, cpu); \
152
- SETUP_CHROMA_420_VERT_FUNC_DEF(24, 64, cpu); \
153
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 16, cpu); \
154
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 64, cpu);
155
-
156
-#define CHROMA_422_VERT_FILTERS_SSE4(cpu) \
157
- SETUP_CHROMA_420_VERT_FUNC_DEF(2, 8, cpu); \
158
- SETUP_CHROMA_420_VERT_FUNC_DEF(2, 16, cpu); \
159
- SETUP_CHROMA_420_VERT_FUNC_DEF(4, 4, cpu); \
160
- SETUP_CHROMA_420_VERT_FUNC_DEF(6, 16, cpu);
161
-
162
-#define CHROMA_444_VERT_FILTERS(cpu) \
163
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 8, cpu); \
164
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 4, cpu); \
165
- SETUP_CHROMA_420_VERT_FUNC_DEF(4, 8, cpu); \
166
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 16, cpu); \
167
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 8, cpu); \
168
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 16, cpu); \
169
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 12, cpu); \
170
- SETUP_CHROMA_420_VERT_FUNC_DEF(12, 16, cpu); \
171
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 4, cpu); \
172
- SETUP_CHROMA_420_VERT_FUNC_DEF(4, 16, cpu); \
173
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 32, cpu); \
174
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 16, cpu); \
175
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 32, cpu); \
176
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 24, cpu); \
177
- SETUP_CHROMA_420_VERT_FUNC_DEF(24, 32, cpu); \
178
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 8, cpu); \
179
- SETUP_CHROMA_420_VERT_FUNC_DEF(8, 32, cpu); \
180
- SETUP_CHROMA_420_VERT_FUNC_DEF(64, 64, cpu); \
181
- SETUP_CHROMA_420_VERT_FUNC_DEF(64, 32, cpu); \
182
- SETUP_CHROMA_420_VERT_FUNC_DEF(32, 64, cpu); \
183
- SETUP_CHROMA_420_VERT_FUNC_DEF(64, 48, cpu); \
184
- SETUP_CHROMA_420_VERT_FUNC_DEF(48, 64, cpu); \
185
- SETUP_CHROMA_420_VERT_FUNC_DEF(64, 16, cpu); \
186
- SETUP_CHROMA_420_VERT_FUNC_DEF(16, 64, cpu)
187
-
188
-#define SETUP_CHROMA_420_HORIZ_FUNC_DEF(W, H, cpu) \
189
- void x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx); \
190
- void x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
191
-
192
-#define CHROMA_420_HORIZ_FILTERS(cpu) \
193
- SETUP_CHROMA_420_HORIZ_FUNC_DEF(4, 4, cpu); \
194
- SETUP_CHROMA_420_HORIZ_FUNC_DEF(4, 2, cpu); \
195
- SETUP_CHROMA_420_HORIZ_FUNC_DEF(2, 4, cpu); \
196
- SETUP_CHROMA_420_HORIZ_FUNC_DEF(8, 8, cpu); \
197
- SETUP_CHROMA_420_HORIZ_FUNC_DEF(8, 4, cpu); \
198
- SETUP_CHROMA_420_HORIZ_FUNC_DEF(4, 8, cpu); \
199
- SETUP_CHROMA_420_HORIZ_FUNC_DEF(8, 6, cpu); \
200
- SETUP_CHROMA_420_HORIZ_FUNC_DEF(6, 8, cpu); \
201
x265_1.7.tar.gz/source/common/x86/loopfilter.asm -> x265_1.8.tar.gz/source/common/x86/loopfilter.asm
Changed
201
1
2
3
SECTION_RODATA 32
4
pb_31: times 32 db 31
5
+pb_124: times 32 db 124
6
pb_15: times 32 db 15
7
pb_movemask_32: times 32 db 0x00
8
times 32 db 0xFF
9
10
cextern pb_128
11
cextern pb_2
12
cextern pw_2
13
+cextern pw_pixel_max
14
cextern pb_movemask
15
+cextern pw_1
16
+cextern hmul_16p
17
+cextern pb_4
18
19
20
;============================================================================================================
21
; void saoCuOrgE0(pixel * rec, int8_t * offsetEo, int lcuWidth, int8_t* signLeft, intptr_t stride)
22
;============================================================================================================
23
INIT_XMM sse4
24
+%if HIGH_BIT_DEPTH
25
+cglobal saoCuOrgE0, 4,5,9
26
+ mov r4d, r4m
27
+ movh m6, [r1]
28
+ movzx r1d, byte [r3]
29
+ pxor m5, m5
30
+ neg r1b
31
+ movd m0, r1d
32
+ lea r1, [r0 + r4 * 2]
33
+ mov r4d, r2d
34
+
35
+.loop:
36
+ movu m7, [r0]
37
+ movu m8, [r0 + 16]
38
+ movu m2, [r0 + 2]
39
+ movu m1, [r0 + 18]
40
+
41
+ pcmpgtw m3, m7, m2
42
+ pcmpgtw m2, m7
43
+ pcmpgtw m4, m8, m1
44
+ pcmpgtw m1, m8
45
+
46
+ packsswb m3, m4
47
+ packsswb m2, m1
48
+
49
+ pand m3, [pb_1]
50
+ por m3, m2
51
+
52
+ palignr m2, m3, m5, 15
53
+ por m2, m0
54
+
55
+ mova m4, [pw_pixel_max]
56
+ psignb m2, [pb_128] ; m2 = signLeft
57
+ pxor m0, m0
58
+ palignr m0, m3, 15
59
+ paddb m3, m2
60
+ paddb m3, [pb_2] ; m2 = uiEdgeType
61
+ pshufb m2, m6, m3
62
+ pmovsxbw m3, m2 ; offsetEo
63
+ punpckhbw m2, m2
64
+ psraw m2, 8
65
+ paddw m7, m3
66
+ paddw m8, m2
67
+ pmaxsw m7, m5
68
+ pmaxsw m8, m5
69
+ pminsw m7, m4
70
+ pminsw m8, m4
71
+ movu [r0], m7
72
+ movu [r0 + 16], m8
73
+
74
+ add r0q, 32
75
+ sub r2d, 16
76
+ jnz .loop
77
+
78
+ movzx r3d, byte [r3 + 1]
79
+ neg r3b
80
+ movd m0, r3d
81
+.loopH:
82
+ movu m7, [r1]
83
+ movu m8, [r1 + 16]
84
+ movu m2, [r1 + 2]
85
+ movu m1, [r1 + 18]
86
+
87
+ pcmpgtw m3, m7, m2
88
+ pcmpgtw m2, m7
89
+ pcmpgtw m4, m8, m1
90
+ pcmpgtw m1, m8
91
+
92
+ packsswb m3, m4
93
+ packsswb m2, m1
94
+
95
+ pand m3, [pb_1]
96
+ por m3, m2
97
+
98
+ palignr m2, m3, m5, 15
99
+ por m2, m0
100
+
101
+ mova m4, [pw_pixel_max]
102
+ psignb m2, [pb_128] ; m2 = signLeft
103
+ pxor m0, m0
104
+ palignr m0, m3, 15
105
+ paddb m3, m2
106
+ paddb m3, [pb_2] ; m2 = uiEdgeType
107
+ pshufb m2, m6, m3
108
+ pmovsxbw m3, m2 ; offsetEo
109
+ punpckhbw m2, m2
110
+ psraw m2, 8
111
+ paddw m7, m3
112
+ paddw m8, m2
113
+ pmaxsw m7, m5
114
+ pmaxsw m8, m5
115
+ pminsw m7, m4
116
+ pminsw m8, m4
117
+ movu [r1], m7
118
+ movu [r1 + 16], m8
119
+
120
+ add r1q, 32
121
+ sub r4d, 16
122
+ jnz .loopH
123
+ RET
124
+%else ; HIGH_BIT_DEPTH
125
cglobal saoCuOrgE0, 5, 5, 8, rec, offsetEo, lcuWidth, signLeft, stride
126
127
mov r4d, r4m
128
129
sub r4d, 16
130
jnz .loopH
131
RET
132
+%endif
133
134
INIT_YMM avx2
135
+%if HIGH_BIT_DEPTH
136
+cglobal saoCuOrgE0, 4,4,9
137
+ vbroadcasti128 m6, [r1]
138
+ movzx r1d, byte [r3]
139
+ neg r1b
140
+ movd xm0, r1d
141
+ movzx r1d, byte [r3 + 1]
142
+ neg r1b
143
+ movd xm1, r1d
144
+ vinserti128 m0, m0, xm1, 1
145
+ mova m5, [pw_pixel_max]
146
+ mov r1d, r4m
147
+ add r1d, r1d
148
+ shr r2d, 4
149
+
150
+.loop:
151
+ movu m7, [r0]
152
+ movu m8, [r0 + r1]
153
+ movu m2, [r0 + 2]
154
+ movu m1, [r0 + r1 + 2]
155
+
156
+ pcmpgtw m3, m7, m2
157
+ pcmpgtw m2, m7
158
+ pcmpgtw m4, m8, m1
159
+ pcmpgtw m1, m8
160
+
161
+ packsswb m3, m4
162
+ packsswb m2, m1
163
+ vpermq m3, m3, 11011000b
164
+ vpermq m2, m2, 11011000b
165
+
166
+ pand m3, [pb_1]
167
+ por m3, m2
168
+
169
+ pslldq m2, m3, 1
170
+ por m2, m0
171
+
172
+ psignb m2, [pb_128] ; m2 = signLeft
173
+ pxor m0, m0
174
+ palignr m0, m3, 15
175
+ paddb m3, m2
176
+ paddb m3, [pb_2] ; m3 = uiEdgeType
177
+ pshufb m2, m6, m3
178
+ pmovsxbw m3, xm2 ; offsetEo
179
+ vextracti128 xm2, m2, 1
180
+ pmovsxbw m2, xm2
181
+ pxor m4, m4
182
+ paddw m7, m3
183
+ paddw m8, m2
184
+ pmaxsw m7, m4
185
+ pmaxsw m8, m4
186
+ pminsw m7, m5
187
+ pminsw m8, m5
188
+ movu [r0], m7
189
+ movu [r0 + r1], m8
190
+
191
+ add r0q, 32
192
+ dec r2d
193
+ jnz .loop
194
+ RET
195
+%else ; HIGH_BIT_DEPTH
196
cglobal saoCuOrgE0, 5, 5, 7, rec, offsetEo, lcuWidth, signLeft, stride
197
198
mov r4d, r4m
199
200
sub r2d, 16
201
x265_1.7.tar.gz/source/common/x86/loopfilter.h -> x265_1.8.tar.gz/source/common/x86/loopfilter.h
Changed
42
1
2
#ifndef X265_LOOPFILTER_H
3
#define X265_LOOPFILTER_H
4
5
-void x265_saoCuOrgE0_sse4(pixel * rec, int8_t * offsetEo, int endX, int8_t* signLeft, intptr_t stride);
6
-void x265_saoCuOrgE0_avx2(pixel * rec, int8_t * offsetEo, int endX, int8_t* signLeft, intptr_t stride);
7
-void x265_saoCuOrgE1_sse4(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width);
8
-void x265_saoCuOrgE1_avx2(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width);
9
-void x265_saoCuOrgE1_2Rows_sse4(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width);
10
-void x265_saoCuOrgE1_2Rows_avx2(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width);
11
-void x265_saoCuOrgE2_sse4(pixel* rec, int8_t* pBufft, int8_t* pBuff1, int8_t* offsetEo, int lcuWidth, intptr_t stride);
12
-void x265_saoCuOrgE2_avx2(pixel* rec, int8_t* pBufft, int8_t* pBuff1, int8_t* offsetEo, int lcuWidth, intptr_t stride);
13
-void x265_saoCuOrgE2_32_avx2(pixel* rec, int8_t* pBufft, int8_t* pBuff1, int8_t* offsetEo, int lcuWidth, intptr_t stride);
14
-void x265_saoCuOrgE3_sse4(pixel *rec, int8_t *upBuff1, int8_t *m_offsetEo, intptr_t stride, int startX, int endX);
15
-void x265_saoCuOrgE3_avx2(pixel *rec, int8_t *upBuff1, int8_t *m_offsetEo, intptr_t stride, int startX, int endX);
16
-void x265_saoCuOrgE3_32_avx2(pixel *rec, int8_t *upBuff1, int8_t *m_offsetEo, intptr_t stride, int startX, int endX);
17
-void x265_saoCuOrgB0_sse4(pixel* rec, const int8_t* offsetBo, int ctuWidth, int ctuHeight, intptr_t stride);
18
-void x265_saoCuOrgB0_avx2(pixel* rec, const int8_t* offsetBo, int ctuWidth, int ctuHeight, intptr_t stride);
19
-void x265_calSign_sse4(int8_t *dst, const pixel *src1, const pixel *src2, const int endX);
20
-void x265_calSign_avx2(int8_t *dst, const pixel *src1, const pixel *src2, const int endX);
21
+#define DECL_SAO(cpu) \
22
+ void PFX(saoCuOrgE0_ ## cpu)(pixel * rec, int8_t * offsetEo, int endX, int8_t* signLeft, intptr_t stride); \
23
+ void PFX(saoCuOrgE1_ ## cpu)(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width); \
24
+ void PFX(saoCuOrgE1_2Rows_ ## cpu)(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width); \
25
+ void PFX(saoCuOrgE2_ ## cpu)(pixel* rec, int8_t* pBufft, int8_t* pBuff1, int8_t* offsetEo, int lcuWidth, intptr_t stride); \
26
+ void PFX(saoCuOrgE2_ ## cpu)(pixel* rec, int8_t* pBufft, int8_t* pBuff1, int8_t* offsetEo, int lcuWidth, intptr_t stride); \
27
+ void PFX(saoCuOrgE2_32_ ## cpu)(pixel* rec, int8_t* pBufft, int8_t* pBuff1, int8_t* offsetEo, int lcuWidth, intptr_t stride); \
28
+ void PFX(saoCuOrgE3_ ## cpu)(pixel *rec, int8_t *upBuff1, int8_t *m_offsetEo, intptr_t stride, int startX, int endX); \
29
+ void PFX(saoCuOrgE3_32_ ## cpu)(pixel *rec, int8_t *upBuff1, int8_t *m_offsetEo, intptr_t stride, int startX, int endX); \
30
+ void PFX(saoCuOrgB0_ ## cpu)(pixel* rec, const int8_t* offsetBo, int ctuWidth, int ctuHeight, intptr_t stride); \
31
+ void PFX(saoCuStatsBO_ ## cpu)(const pixel *fenc, const pixel *rec, intptr_t stride, int endX, int endY, int32_t *stats, int32_t *count); \
32
+ void PFX(saoCuStatsE0_ ## cpu)(const pixel *fenc, const pixel *rec, intptr_t stride, int endX, int endY, int32_t *stats, int32_t *count); \
33
+ void PFX(saoCuStatsE1_ ## cpu)(const pixel *fenc, const pixel *rec, intptr_t stride, int8_t *upBuff1, int endX, int endY, int32_t *stats, int32_t *count); \
34
+ void PFX(saoCuStatsE2_ ## cpu)(const pixel *fenc, const pixel *rec, intptr_t stride, int8_t *upBuff1, int8_t *upBufft, int endX, int endY, int32_t *stats, int32_t *count); \
35
+ void PFX(saoCuStatsE3_ ## cpu)(const pixel *fenc, const pixel *rec, intptr_t stride, int8_t *upBuff1, int endX, int endY, int32_t *stats, int32_t *count); \
36
+ void PFX(calSign_ ## cpu)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX);
37
+
38
+DECL_SAO(sse4);
39
+DECL_SAO(avx2);
40
41
#endif // ifndef X265_LOOPFILTER_H
42
x265_1.7.tar.gz/source/common/x86/mc-a.asm -> x265_1.8.tar.gz/source/common/x86/mc-a.asm
Changed
201
1
2
%include "x86inc.asm"
3
%include "x86util.asm"
4
5
+%if BIT_DEPTH==8
6
+ %define ADDAVG_FACTOR 256
7
+ %define ADDAVG_ROUND 128
8
+%elif BIT_DEPTH==10
9
+ %define ADDAVG_FACTOR 1024
10
+ %define ADDAVG_ROUND 512
11
+%elif BIT_DEPTH==12
12
+ %define ADDAVG_FACTOR 4096
13
+ %define ADDAVG_ROUND 2048
14
+%else
15
+ %error Unsupport bit depth!
16
+%endif
17
+
18
SECTION_RODATA 32
19
20
ch_shuf: times 2 db 0,2,2,4,4,6,6,8,1,3,3,5,5,7,7,9
21
22
cextern pw_512
23
cextern pw_1023
24
cextern pw_1024
25
+cextern pw_2048
26
+cextern pw_4096
27
cextern pw_00ff
28
cextern pw_pixel_max
29
-cextern sw_64
30
cextern pd_32
31
-cextern deinterleave_shufd
32
+cextern pd_64
33
34
;====================================================================================================================
35
;void addAvg (int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
36
37
punpcklqdq m1, m2
38
punpcklqdq m3, m5
39
paddw m1, m3
40
- pmulhrsw m1, [pw_1024]
41
- paddw m1, [pw_512]
42
+ pmulhrsw m1, [pw_ %+ ADDAVG_FACTOR]
43
+ paddw m1, [pw_ %+ ADDAVG_ROUND]
44
45
pxor m0, m0
46
pmaxsw m1, m0
47
- pminsw m1, [pw_1023]
48
+ pminsw m1, [pw_pixel_max]
49
movd [r2], m1
50
pextrd [r2 + r5], m1, 1
51
lea r2, [r2 + 2 * r5]
52
pextrd [r2], m1, 2
53
pextrd [r2 + r5], m1, 3
54
-
55
RET
56
+
57
+
58
;-----------------------------------------------------------------------------
59
INIT_XMM sse4
60
cglobal addAvg_2x8, 6,6,8, pSrc0, pSrc1, pDst, iStride0, iStride1, iDstStride
61
- mova m0, [pw_512]
62
+ mova m0, [pw_ %+ ADDAVG_ROUND]
63
pxor m7, m7
64
add r3, r3
65
add r4, r4
66
67
punpcklqdq m1, m2
68
punpcklqdq m3, m5
69
paddw m1, m3
70
- pmulhrsw m1, [pw_1024]
71
+ pmulhrsw m1, [pw_ %+ ADDAVG_FACTOR]
72
paddw m1, m0
73
74
pmaxsw m1, m7
75
- pminsw m1, [pw_1023]
76
+ pminsw m1, [pw_pixel_max]
77
movd [r2], m1
78
pextrd [r2 + r5], m1, 1
79
lea r2, [r2 + 2 * r5]
80
81
;-----------------------------------------------------------------------------
82
INIT_XMM sse4
83
cglobal addAvg_2x16, 6,7,8, pSrc0, pSrc1, pDst, iStride0, iStride1, iDstStride
84
- mova m6, [pw_1023]
85
- mova m7, [pw_1024]
86
+ mova m6, [pw_pixel_max]
87
+ mova m7, [pw_ %+ ADDAVG_FACTOR]
88
mov r6d, 16/4
89
add r3, r3
90
add r4, r4
91
92
punpcklqdq m3, m5
93
paddw m1, m3
94
pmulhrsw m1, m7
95
- paddw m1, [pw_512]
96
+ paddw m1, [pw_ %+ ADDAVG_ROUND]
97
pxor m0, m0
98
pmaxsw m1, m0
99
pminsw m1, m6
100
101
punpcklqdq m0, m1
102
punpcklqdq m2, m3
103
paddw m0, m2
104
- pmulhrsw m0, [pw_1024]
105
- paddw m0, [pw_512]
106
+ pmulhrsw m0, [pw_ %+ ADDAVG_FACTOR]
107
+ paddw m0, [pw_ %+ ADDAVG_ROUND]
108
109
pxor m6, m6
110
pmaxsw m0, m6
111
- pminsw m0, [pw_1023]
112
+ pminsw m0, [pw_pixel_max]
113
movh [r2], m0
114
movhps [r2 + r5], m0
115
RET
116
;-----------------------------------------------------------------------------
117
INIT_XMM sse4
118
cglobal addAvg_6x8, 6,6,8, pSrc0, pSrc1, pDst, iStride0, iStride1, iDstStride
119
- mova m4, [pw_512]
120
- mova m5, [pw_1023]
121
- mova m7, [pw_1024]
122
+ mova m4, [pw_ %+ ADDAVG_ROUND]
123
+ mova m5, [pw_pixel_max]
124
+ mova m7, [pw_ %+ ADDAVG_FACTOR]
125
pxor m6, m6
126
add r3, r3
127
add r4, r4
128
129
;-----------------------------------------------------------------------------
130
INIT_XMM sse4
131
cglobal addAvg_6x16, 6,7,8, pSrc0, pSrc1, pDst, iStride0, iStride1, iDstStride
132
- mova m4, [pw_512]
133
- mova m5, [pw_1023]
134
- mova m7, [pw_1024]
135
+ mova m4, [pw_ %+ ADDAVG_ROUND]
136
+ mova m5, [pw_pixel_max]
137
+ mova m7, [pw_ %+ ADDAVG_FACTOR]
138
pxor m6, m6
139
mov r6d, 16/2
140
add r3, r3
141
142
;-----------------------------------------------------------------------------
143
INIT_XMM sse4
144
cglobal addAvg_8x2, 6,6,8, pSrc0, pSrc1, pDst, iStride0, iStride1, iDstStride
145
- mova m4, [pw_512]
146
- mova m5, [pw_1023]
147
- mova m7, [pw_1024]
148
+ mova m4, [pw_ %+ ADDAVG_ROUND]
149
+ mova m5, [pw_pixel_max]
150
+ mova m7, [pw_ %+ ADDAVG_FACTOR]
151
pxor m6, m6
152
add r3, r3
153
add r4, r4
154
155
;-----------------------------------------------------------------------------
156
INIT_XMM sse4
157
cglobal addAvg_8x6, 6,6,8, pSrc0, pSrc1, pDst, iStride0, iStride1, iDstStride
158
- mova m4, [pw_512]
159
- mova m5, [pw_1023]
160
- mova m7, [pw_1024]
161
+ mova m4, [pw_ %+ ADDAVG_ROUND]
162
+ mova m5, [pw_pixel_max]
163
+ mova m7, [pw_ %+ ADDAVG_FACTOR]
164
pxor m6, m6
165
add r3, r3
166
add r4, r4
167
168
%macro ADDAVG_W4_H4 1
169
INIT_XMM sse4
170
cglobal addAvg_4x%1, 6,7,8, pSrc0, pSrc1, pDst, iStride0, iStride1, iDstStride
171
- mova m4, [pw_512]
172
- mova m5, [pw_1023]
173
- mova m7, [pw_1024]
174
+ mova m4, [pw_ %+ ADDAVG_ROUND]
175
+ mova m5, [pw_pixel_max]
176
+ mova m7, [pw_ %+ ADDAVG_FACTOR]
177
pxor m6, m6
178
add r3, r3
179
add r4, r4
180
181
%macro ADDAVG_W8_H4 1
182
INIT_XMM sse4
183
cglobal addAvg_8x%1, 6,7,8, pSrc0, pSrc1, pDst, iStride0, iStride1, iDstStride
184
- mova m4, [pw_512]
185
- mova m5, [pw_1023]
186
- mova m7, [pw_1024]
187
+ mova m4, [pw_ %+ ADDAVG_ROUND]
188
+ mova m5, [pw_pixel_max]
189
+ mova m7, [pw_ %+ ADDAVG_FACTOR]
190
pxor m6, m6
191
add r3, r3
192
add r4, r4
193
194
%macro ADDAVG_W12_H4 1
195
INIT_XMM sse4
196
cglobal addAvg_12x%1, 6,7,8, pSrc0, pSrc1, pDst, iStride0, iStride1, iDstStride
197
- mova m4, [pw_512]
198
- mova m5, [pw_1023]
199
- mova m7, [pw_1024]
200
+ mova m4, [pw_ %+ ADDAVG_ROUND]
201
x265_1.7.tar.gz/source/common/x86/mc-a2.asm -> x265_1.8.tar.gz/source/common/x86/mc-a2.asm
Changed
82
1
2
%endmacro
3
4
%macro FILT32x4U 4
5
- mova m1, [r0+r5]
6
+ movu m1, [r0+r5]
7
pavgb m0, m1, [r0]
8
movu m3, [r0+r5+1]
9
pavgb m2, m3, [r0+1]
10
11
pavgb m0, m2
12
pavgb m1, m3
13
14
- mova m3, [r0+r5+mmsize]
15
+ movu m3, [r0+r5+mmsize]
16
pavgb m2, m3, [r0+mmsize]
17
movu m5, [r0+r5+1+mmsize]
18
pavgb m4, m5, [r0+1+mmsize]
19
20
vpermq m1, m4, q3120
21
vpermq m2, m2, q3120
22
vpermq m3, m5, q3120
23
- mova [%1], m0
24
- mova [%2], m1
25
- mova [%3], m2
26
- mova [%4], m3
27
+ movu [%1], m0
28
+ movu [%2], m1
29
+ movu [%3], m2
30
+ movu [%4], m3
31
%endmacro
32
33
%macro FILT16x2 4
34
35
%endmacro
36
37
%macro FILT8xA 4
38
- mova m3, [r0+%4+mmsize]
39
- mova m2, [r0+%4]
40
+ movu m3, [r0+%4+mmsize]
41
+ movu m2, [r0+%4]
42
pavgw m3, [r0+%4+r5+mmsize]
43
pavgw m2, [r0+%4+r5]
44
PALIGNR %1, m3, 2, m6
45
46
packssdw m3, %1
47
packssdw m5, m4
48
%endif
49
- mova [%2], m3
50
- mova [%3], m5
51
- mova %1, m2
52
+%if cpuflag(avx2)
53
+ vpermq m3, m3, q3120
54
+ vpermq m5, m5, q3120
55
+%endif
56
+ movu [%2], m3
57
+ movu [%3], m5
58
+ movu %1, m2
59
%endmacro
60
61
;-----------------------------------------------------------------------------
62
63
.vloop:
64
mov r6d, r7m
65
%ifnidn cpuname, mmx2
66
- mova m0, [r0]
67
- mova m1, [r0+r5]
68
+ movu m0, [r0]
69
+ movu m1, [r0+r5]
70
pavgw m0, m1
71
pavgw m1, [r0+r5*2]
72
%endif
73
74
FRAME_INIT_LOWRES
75
INIT_XMM xop
76
FRAME_INIT_LOWRES
77
-%if HIGH_BIT_DEPTH==0
78
+%if ARCH_X86_64 == 1
79
INIT_YMM avx2
80
FRAME_INIT_LOWRES
81
%endif
82
x265_1.7.tar.gz/source/common/x86/mc.h -> x265_1.8.tar.gz/source/common/x86/mc.h
Changed
49
1
2
#define X265_MC_H
3
4
#define LOWRES(cpu) \
5
- void x265_frame_init_lowres_core_ ## cpu(const pixel* src0, pixel* dst0, pixel* dsth, pixel* dstv, pixel* dstc, \
6
+ void PFX(frame_init_lowres_core_ ## cpu)(const pixel* src0, pixel* dst0, pixel* dsth, pixel* dstv, pixel* dstc, \
7
intptr_t src_stride, intptr_t dst_stride, int width, int height);
8
LOWRES(mmx2)
9
LOWRES(sse2)
10
LOWRES(ssse3)
11
LOWRES(avx)
12
+LOWRES(avx2)
13
LOWRES(xop)
14
15
-#define DECL_SUF(func, args) \
16
- void func ## _mmx2 args; \
17
- void func ## _sse2 args; \
18
- void func ## _ssse3 args;
19
-DECL_SUF(x265_pixel_avg_64x64, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
20
-DECL_SUF(x265_pixel_avg_64x48, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
21
-DECL_SUF(x265_pixel_avg_64x32, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
22
-DECL_SUF(x265_pixel_avg_64x16, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
23
-DECL_SUF(x265_pixel_avg_48x64, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
24
-DECL_SUF(x265_pixel_avg_32x64, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
25
-DECL_SUF(x265_pixel_avg_32x32, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
26
-DECL_SUF(x265_pixel_avg_32x24, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
27
-DECL_SUF(x265_pixel_avg_32x16, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
28
-DECL_SUF(x265_pixel_avg_32x8, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
29
-DECL_SUF(x265_pixel_avg_24x32, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
30
-DECL_SUF(x265_pixel_avg_16x64, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
31
-DECL_SUF(x265_pixel_avg_16x32, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
32
-DECL_SUF(x265_pixel_avg_16x16, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
33
-DECL_SUF(x265_pixel_avg_16x12, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
34
-DECL_SUF(x265_pixel_avg_16x8, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
35
-DECL_SUF(x265_pixel_avg_16x4, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
36
-DECL_SUF(x265_pixel_avg_12x16, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
37
-DECL_SUF(x265_pixel_avg_8x32, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
38
-DECL_SUF(x265_pixel_avg_8x16, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
39
-DECL_SUF(x265_pixel_avg_8x8, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
40
-DECL_SUF(x265_pixel_avg_8x4, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
41
-DECL_SUF(x265_pixel_avg_4x16, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
42
-DECL_SUF(x265_pixel_avg_4x8, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
43
-DECL_SUF(x265_pixel_avg_4x4, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
44
-
45
#undef LOWRES
46
-#undef DECL_SUF
47
48
#endif // ifndef X265_MC_H
49
x265_1.7.tar.gz/source/common/x86/pixel-a.asm -> x265_1.8.tar.gz/source/common/x86/pixel-a.asm
Changed
201
1
2
;* Alex Izvorski <aizvorksi@gmail.com>
3
;* Fiona Glaser <fiona@x264.com>
4
;* Oskar Arvidsson <oskar@irock.se>
5
+;* Min Chen <chenm003@163.com>
6
;*
7
;* This program is free software; you can redistribute it and/or modify
8
;* it under the terms of the GNU General Public License as published by
9
10
%include "x86util.asm"
11
12
SECTION_RODATA 32
13
-hmul_16p: times 16 db 1
14
- times 8 db 1, -1
15
hmul_8p: times 8 db 1
16
times 4 db 1, -1
17
times 8 db 1
18
19
times 2 dw 1, -1
20
times 4 dw 1
21
times 2 dw 1, -1
22
-ALIGN 32
23
-hmul_w: times 2 dw 1, -1, 1, -1, 1, -1, 1, -1
24
+
25
ALIGN 32
26
transd_shuf1: SHUFFLE_MASK_W 0, 8, 2, 10, 4, 12, 6, 14
27
transd_shuf2: SHUFFLE_MASK_W 1, 9, 3, 11, 5, 13, 7, 15
28
29
sw_f0: dq 0xfff0, 0
30
pd_f0: times 4 dd 0xffff0000
31
32
-pw_76543210: dw 0, 1, 2, 3, 4, 5, 6, 7
33
-
34
SECTION .text
35
36
cextern pb_0
37
38
cextern pd_1
39
cextern popcnt_table
40
cextern pd_2
41
+cextern hmul_16p
42
+cextern pb_movemask
43
+cextern pw_pixel_max
44
45
;=============================================================================
46
; SATD
47
48
%endif
49
HADAMARD4_2D 4, 5, 6, 7, 3, %%n
50
paddw m4, m6
51
+;%if HIGH_BIT_DEPTH && (BIT_DEPTH == 12)
52
+; pxor m5, m5
53
+; punpcklwd m6, m4, m5
54
+; punpckhwd m4, m5
55
+; paddd m4, m6
56
+;%endif
57
SWAP %%n, 4
58
%endmacro
59
60
61
HADAMARD 1, max, %2, %4, %6, %7
62
%endif
63
%ifnidn %9, swap
64
+ %if (BIT_DEPTH == 12)
65
+ pxor m%6, m%6
66
+ punpcklwd m%7, m%2, m%6
67
+ punpckhwd m%2, m%6
68
+ paddd m%8, m%7
69
+ paddd m%8, m%2
70
+ %else
71
paddw m%8, m%2
72
+ %endif
73
%else
74
SWAP %8, %2
75
+ %if (BIT_DEPTH == 12)
76
+ pxor m%6, m%6
77
+ punpcklwd m%7, m%8, m%6
78
+ punpckhwd m%8, m%6
79
+ paddd m%8, m%7
80
+ %endif
81
%endif
82
%if %1
83
+ %if (BIT_DEPTH == 12)
84
+ pxor m%6, m%6
85
+ punpcklwd m%7, m%4, m%6
86
+ punpckhwd m%4, m%6
87
+ paddd m%8, m%7
88
+ paddd m%8, m%4
89
+ %else
90
paddw m%8, m%4
91
+ %endif
92
%else
93
HADAMARD 1, max, %3, %5, %6, %7
94
+ %if (BIT_DEPTH == 12)
95
+ pxor m%6, m%6
96
+ punpcklwd m%7, m%3, m%6
97
+ punpckhwd m%3, m%6
98
+ paddd m%8, m%7
99
+ paddd m%8, m%3
100
+ %else
101
paddw m%8, m%3
102
+ %endif
103
%endif
104
%endmacro
105
106
107
%endif
108
109
pxor m%10, m%10
110
- mova m%9, m%2
111
- punpcklwd m%9, m%10
112
+ punpcklwd m%9, m%2, m%10
113
paddd m%8, m%9
114
- mova m%9, m%2
115
- punpckhwd m%9, m%10
116
+ punpckhwd m%9, m%2, m%10
117
paddd m%8, m%9
118
119
%if %1
120
pxor m%10, m%10
121
- mova m%9, m%4
122
- punpcklwd m%9, m%10
123
+ punpcklwd m%9, m%4, m%10
124
paddd m%8, m%9
125
- mova m%9, m%4
126
- punpckhwd m%9, m%10
127
+ punpckhwd m%9, m%4, m%10
128
paddd m%8, m%9
129
%else
130
HADAMARD 1, max, %3, %5, %6, %7
131
pxor m%10, m%10
132
- mova m%9, m%3
133
- punpcklwd m%9, m%10
134
+ punpcklwd m%9, m%3, m%10
135
paddd m%8, m%9
136
- mova m%9, m%3
137
- punpckhwd m%9, m%10
138
+ punpckhwd m%9, m%3, m%10
139
paddd m%8, m%9
140
%endif
141
%endmacro
142
143
movd eax, m0
144
and eax, 0xffff
145
%endif ; HIGH_BIT_DEPTH
146
+ EMMS
147
RET
148
%endmacro
149
150
151
; int pixel_satd_16x16( uint8_t *, intptr_t, uint8_t *, intptr_t )
152
;-----------------------------------------------------------------------------
153
INIT_MMX mmx2
154
-cglobal pixel_satd_16x4_internal
155
- SATD_4x4_MMX m2, 0, 0
156
- SATD_4x4_MMX m1, 4, 0
157
- paddw m0, m2
158
- SATD_4x4_MMX m2, 8, 0
159
- paddw m0, m1
160
- SATD_4x4_MMX m1, 12, 0
161
- paddw m0, m2
162
- paddw m0, m1
163
- ret
164
-
165
-cglobal pixel_satd_8x8_internal
166
- SATD_4x4_MMX m2, 0, 0
167
- SATD_4x4_MMX m1, 4, 1
168
- paddw m0, m2
169
- paddw m0, m1
170
-pixel_satd_8x4_internal_mmx2:
171
- SATD_4x4_MMX m2, 0, 0
172
- SATD_4x4_MMX m1, 4, 0
173
- paddw m0, m2
174
- paddw m0, m1
175
- ret
176
-
177
-%if HIGH_BIT_DEPTH
178
-%macro SATD_MxN_MMX 3
179
-cglobal pixel_satd_%1x%2, 4,7
180
- SATD_START_MMX
181
- pxor m0, m0
182
- call pixel_satd_%1x%3_internal_mmx2
183
- HADDUW m0, m1
184
- movd r6d, m0
185
-%rep %2/%3-1
186
- pxor m0, m0
187
- lea r0, [r0+4*r1]
188
- lea r2, [r2+4*r3]
189
- call pixel_satd_%1x%3_internal_mmx2
190
- movd m2, r4
191
- HADDUW m0, m1
192
- movd r4, m0
193
- add r6, r4
194
- movd r4, m2
195
-%endrep
196
- movifnidn eax, r6d
197
- RET
198
-%endmacro
199
-
200
-SATD_MxN_MMX 16, 16, 4
201
x265_1.7.tar.gz/source/common/x86/pixel-util.h -> x265_1.8.tar.gz/source/common/x86/pixel-util.h
Changed
150
1
2
#ifndef X265_PIXEL_UTIL_H
3
#define X265_PIXEL_UTIL_H
4
5
-void x265_getResidual4_sse2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
6
-void x265_getResidual8_sse2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
7
-void x265_getResidual16_sse2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
8
-void x265_getResidual16_sse4(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
9
-void x265_getResidual32_sse2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
10
-void x265_getResidual32_sse4(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
11
-void x265_getResidual16_avx2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
12
-void x265_getResidual32_avx2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
13
-
14
-void x265_transpose4_sse2(pixel* dest, const pixel* src, intptr_t stride);
15
-void x265_transpose8_sse2(pixel* dest, const pixel* src, intptr_t stride);
16
-void x265_transpose16_sse2(pixel* dest, const pixel* src, intptr_t stride);
17
-void x265_transpose32_sse2(pixel* dest, const pixel* src, intptr_t stride);
18
-void x265_transpose64_sse2(pixel* dest, const pixel* src, intptr_t stride);
19
-
20
-void x265_transpose8_avx2(pixel* dest, const pixel* src, intptr_t stride);
21
-void x265_transpose16_avx2(pixel* dest, const pixel* src, intptr_t stride);
22
-void x265_transpose32_avx2(pixel* dest, const pixel* src, intptr_t stride);
23
-void x265_transpose64_avx2(pixel* dest, const pixel* src, intptr_t stride);
24
-
25
-uint32_t x265_quant_sse4(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);
26
-uint32_t x265_quant_avx2(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);
27
-uint32_t x265_nquant_sse4(const int16_t* coef, const int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff);
28
-uint32_t x265_nquant_avx2(const int16_t* coef, const int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff);
29
-void x265_dequant_normal_sse4(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift);
30
-void x265_dequant_normal_avx2(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift);
31
-
32
-int x265_count_nonzero_4x4_ssse3(const int16_t* quantCoeff);
33
-int x265_count_nonzero_8x8_ssse3(const int16_t* quantCoeff);
34
-int x265_count_nonzero_16x16_ssse3(const int16_t* quantCoeff);
35
-int x265_count_nonzero_32x32_ssse3(const int16_t* quantCoeff);
36
-int x265_count_nonzero_4x4_avx2(const int16_t* quantCoeff);
37
-int x265_count_nonzero_8x8_avx2(const int16_t* quantCoeff);
38
-int x265_count_nonzero_16x16_avx2(const int16_t* quantCoeff);
39
-int x265_count_nonzero_32x32_avx2(const int16_t* quantCoeff);
40
-
41
-void x265_weight_pp_sse4(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
42
-void x265_weight_pp_avx2(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
43
-void x265_weight_sp_sse4(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
44
-
45
-void x265_pixel_ssim_4x4x2_core_mmx2(const uint8_t* pix1, intptr_t stride1,
46
- const uint8_t* pix2, intptr_t stride2, int sums[2][4]);
47
-void x265_pixel_ssim_4x4x2_core_sse2(const pixel* pix1, intptr_t stride1,
48
- const pixel* pix2, intptr_t stride2, int sums[2][4]);
49
-void x265_pixel_ssim_4x4x2_core_avx(const pixel* pix1, intptr_t stride1,
50
- const pixel* pix2, intptr_t stride2, int sums[2][4]);
51
-float x265_pixel_ssim_end4_sse2(int sum0[5][4], int sum1[5][4], int width);
52
-float x265_pixel_ssim_end4_avx(int sum0[5][4], int sum1[5][4], int width);
53
-
54
-void x265_scale1D_128to64_ssse3(pixel*, const pixel*);
55
-void x265_scale1D_128to64_avx2(pixel*, const pixel*);
56
-void x265_scale2D_64to32_ssse3(pixel*, const pixel*, intptr_t);
57
-void x265_scale2D_64to32_avx2(pixel*, const pixel*, intptr_t);
58
-
59
-int x265_scanPosLast_x64(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig, const uint16_t* scanCG4x4, const int trSize);
60
-int x265_scanPosLast_avx2_bmi2(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig, const uint16_t* scanCG4x4, const int trSize);
61
-uint32_t x265_findPosFirstLast_ssse3(const int16_t *dstCoeff, const intptr_t trSize, const uint16_t scanTbl[16]);
62
-
63
-#define SETUP_CHROMA_PIXELSUB_PS_FUNC(W, H, cpu) \
64
- void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t* dest, intptr_t destride, const pixel* src0, const pixel* src1, intptr_t srcstride0, intptr_t srcstride1); \
65
- void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel* dest, intptr_t destride, const pixel* src0, const int16_t* src1, intptr_t srcStride0, intptr_t srcStride1);
66
-
67
-#define CHROMA_420_PIXELSUB_DEF(cpu) \
68
- SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 4, cpu); \
69
- SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 8, cpu); \
70
- SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 16, cpu); \
71
- SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 32, cpu);
72
-
73
-#define CHROMA_422_PIXELSUB_DEF(cpu) \
74
- SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 8, cpu); \
75
- SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 16, cpu); \
76
- SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 32, cpu); \
77
- SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 64, cpu);
78
-
79
-#define SETUP_LUMA_PIXELSUB_PS_FUNC(W, H, cpu) \
80
- void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t* dest, intptr_t destride, const pixel* src0, const pixel* src1, intptr_t srcstride0, intptr_t srcstride1); \
81
- void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel* dest, intptr_t destride, const pixel* src0, const int16_t* src1, intptr_t srcStride0, intptr_t srcStride1);
82
-
83
-#define LUMA_PIXELSUB_DEF(cpu) \
84
- SETUP_LUMA_PIXELSUB_PS_FUNC(8, 8, cpu); \
85
- SETUP_LUMA_PIXELSUB_PS_FUNC(16, 16, cpu); \
86
- SETUP_LUMA_PIXELSUB_PS_FUNC(32, 32, cpu); \
87
- SETUP_LUMA_PIXELSUB_PS_FUNC(64, 64, cpu);
88
-
89
-LUMA_PIXELSUB_DEF(_sse2);
90
-CHROMA_420_PIXELSUB_DEF(_sse2);
91
-CHROMA_422_PIXELSUB_DEF(_sse2);
92
-
93
-LUMA_PIXELSUB_DEF(_sse4);
94
-CHROMA_420_PIXELSUB_DEF(_sse4);
95
-CHROMA_422_PIXELSUB_DEF(_sse4);
96
-
97
-#define SETUP_LUMA_PIXELVAR_FUNC(W, H, cpu) \
98
- uint64_t x265_pixel_var_ ## W ## x ## H ## cpu(const pixel* pix, intptr_t pixstride);
99
-
100
-#define LUMA_PIXELVAR_DEF(cpu) \
101
- SETUP_LUMA_PIXELVAR_FUNC(8, 8, cpu); \
102
- SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu); \
103
- SETUP_LUMA_PIXELVAR_FUNC(32, 32, cpu); \
104
- SETUP_LUMA_PIXELVAR_FUNC(64, 64, cpu);
105
-
106
-LUMA_PIXELVAR_DEF(_sse2);
107
-LUMA_PIXELVAR_DEF(_xop);
108
-LUMA_PIXELVAR_DEF(_avx);
109
-
110
-#undef CHROMA_420_PIXELSUB_DEF
111
-#undef CHROMA_422_PIXELSUB_DEF
112
-#undef LUMA_PIXELSUB_DEF
113
-#undef LUMA_PIXELVAR_DEF
114
-#undef SETUP_CHROMA_PIXELSUB_PS_FUNC
115
-#undef SETUP_LUMA_PIXELSUB_PS_FUNC
116
-#undef SETUP_LUMA_PIXELVAR_FUNC
117
+#define DEFINE_UTILS(cpu) \
118
+ FUNCDEF_TU_S2(void, getResidual, cpu, const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride); \
119
+ FUNCDEF_TU_S2(void, transpose, cpu, pixel* dest, const pixel* src, intptr_t stride); \
120
+ FUNCDEF_TU(int, count_nonzero, cpu, const int16_t* quantCoeff); \
121
+ uint32_t PFX(quant_ ## cpu(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff)); \
122
+ uint32_t PFX(nquant_ ## cpu(const int16_t* coef, const int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff)); \
123
+ void PFX(dequant_normal_ ## cpu(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift)); \
124
+ void PFX(dequant_scaling_## cpu(const int16_t* src, const int32_t* dequantCoef, int16_t* dst, int num, int mcqp_miper, int shift)); \
125
+ void PFX(weight_pp_ ## cpu(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)); \
126
+ void PFX(weight_sp_ ## cpu(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)); \
127
+ void PFX(scale1D_128to64_ ## cpu(pixel*, const pixel*)); \
128
+ void PFX(scale2D_64to32_ ## cpu(pixel*, const pixel*, intptr_t)); \
129
+ uint32_t PFX(costCoeffRemain_ ## cpu(uint16_t *absCoeff, int numNonZero, int idx)); \
130
+ uint32_t PFX(costC1C2Flag_sse2(uint16_t *absCoeff, intptr_t numNonZero, uint8_t *baseCtxMod, intptr_t ctxOffset)); \
131
+
132
+DEFINE_UTILS(sse2);
133
+DEFINE_UTILS(ssse3);
134
+DEFINE_UTILS(sse4);
135
+DEFINE_UTILS(avx2);
136
+
137
+#undef DEFINE_UTILS
138
+
139
+void PFX(pixel_ssim_4x4x2_core_sse2(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4]));
140
+void PFX(pixel_ssim_4x4x2_core_avx(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4]));
141
+float PFX(pixel_ssim_end4_sse2(int sum0[5][4], int sum1[5][4], int width));
142
+float PFX(pixel_ssim_end4_avx(int sum0[5][4], int sum1[5][4], int width));
143
+
144
+int PFX(scanPosLast_x64(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig, const uint16_t* scanCG4x4, const int trSize));
145
+int PFX(scanPosLast_avx2_bmi2(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig, const uint16_t* scanCG4x4, const int trSize));
146
+uint32_t PFX(findPosFirstLast_ssse3(const int16_t *dstCoeff, const intptr_t trSize, const uint16_t scanTbl[16]));
147
+uint32_t PFX(costCoeffNxN_sse4(const uint16_t *scan, const coeff_t *coeff, intptr_t trSize, uint16_t *absCoeff, const uint8_t *tabSigCtx, uint32_t scanFlagMask, uint8_t *baseCtx, int offset, int scanPosSigOff, int subPosBase));
148
149
#endif // ifndef X265_PIXEL_UTIL_H
150
x265_1.7.tar.gz/source/common/x86/pixel-util8.asm -> x265_1.8.tar.gz/source/common/x86/pixel-util8.asm
Changed
201
1
2
3
SECTION_RODATA 32
4
5
-%if BIT_DEPTH == 10
6
+%if BIT_DEPTH == 12
7
+ssim_c1: times 4 dd 107321.76 ; .01*.01*4095*4095*64
8
+ssim_c2: times 4 dd 60851437.92 ; .03*.03*4095*4095*64*63
9
+pf_64: times 4 dd 64.0
10
+pf_128: times 4 dd 128.0
11
+%elif BIT_DEPTH == 10
12
ssim_c1: times 4 dd 6697.7856 ; .01*.01*1023*1023*64
13
ssim_c2: times 4 dd 3797644.4352 ; .03*.03*1023*1023*64*63
14
pf_64: times 4 dd 64.0
15
16
times 16 db 0
17
deinterleave_shuf: times 2 db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
18
deinterleave_word_shuf: times 2 db 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15
19
-hmul_16p: times 16 db 1
20
- times 8 db 1, -1
21
hmulw_16p: times 8 dw 1
22
times 4 dw 1, -1
23
24
-trans8_shuf: dd 0, 4, 1, 5, 2, 6, 3, 7
25
-
26
SECTION .text
27
28
cextern pw_1
29
cextern pw_0_15
30
cextern pb_1
31
+cextern pb_128
32
cextern pw_00ff
33
cextern pw_1023
34
cextern pw_3fff
35
36
cextern pb_16
37
cextern pb_32
38
cextern pb_64
39
+cextern hmul_16p
40
+cextern trans8_shuf
41
+cextern_naked private_prefix %+ _entropyStateBits
42
+cextern pb_movemask
43
44
;-----------------------------------------------------------------------------
45
; void getResidual(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride)
46
47
movd xm6, r4d ; m6 = qbits8
48
49
; fill offset
50
+%if UNIX64 == 0
51
vpbroadcastd m5, r5m ; m5 = add
52
+%else ; Mac
53
+ movd xm5, r5m
54
+ vpbroadcastd m5, xm5 ; m5 = add
55
+%endif
56
57
lea r5, [pw_1]
58
59
60
movd xm6, r4d ; m6 = qbits8
61
62
; fill offset
63
- vpbroadcastd m5, r5m ; m5 = ad
64
+%if UNIX64 == 0
65
+ vpbroadcastd m5, r5m ; m5 = add
66
+%else ; Mac
67
+ movd xm5, r5m
68
+ vpbroadcastd m5, xm5 ; m5 = add
69
+%endif
70
71
lea r5, [pd_1]
72
73
74
75
INIT_YMM avx2
76
cglobal nquant, 3,5,7
77
+%if UNIX64 == 0
78
vpbroadcastd m4, r4m
79
+%else ; Mac
80
+ movd xm4, r4m
81
+ vpbroadcastd m4, xm4
82
+%endif
83
vpbroadcastd m6, [pw_1]
84
mov r4d, r5m
85
pxor m5, m5 ; m7 = numZero
86
87
%if HIGH_BIT_DEPTH
88
cmp r3d, 32767
89
jle .skip
90
- shr r3d, 2
91
- sub r4d, 2
92
+ shr r3d, (BIT_DEPTH - 8)
93
+ sub r4d, (BIT_DEPTH - 8)
94
.skip:
95
%endif
96
movd m0, r4d ; m0 = shift
97
98
jnz .loop
99
RET
100
101
+;----------------------------------------------------------------------------------------------------------------------
102
+;void dequant_scaling(const int16_t* src, const int32_t* dequantCoef, int16_t* dst, int num, int mcqp_miper, int shift)
103
+;----------------------------------------------------------------------------------------------------------------------
104
+INIT_XMM sse4
105
+cglobal dequant_scaling, 6,6,6
106
+ add r5d, 4
107
+ shr r3d, 3 ; num/8
108
+ cmp r5d, r4d
109
+ jle .skip
110
+ sub r5d, r4d
111
+ mova m0, [pd_1]
112
+ movd m1, r5d ; shift - per
113
+ dec r5d
114
+ movd m2, r5d ; shift - per - 1
115
+ pslld m0, m2 ; 1 << shift - per - 1
116
+
117
+.part0:
118
+ pmovsxwd m2, [r0]
119
+ pmovsxwd m4, [r0 + 8]
120
+ movu m3, [r1]
121
+ movu m5, [r1 + 16]
122
+ pmulld m2, m3
123
+ pmulld m4, m5
124
+ paddd m2, m0
125
+ paddd m4, m0
126
+ psrad m2, m1
127
+ psrad m4, m1
128
+ packssdw m2, m4
129
+ movu [r2], m2
130
+
131
+ add r0, 16
132
+ add r1, 32
133
+ add r2, 16
134
+ dec r3d
135
+ jnz .part0
136
+ jmp .end
137
+
138
+.skip:
139
+ sub r4d, r5d ; per - shift
140
+ movd m0, r4d
141
+
142
+.part1:
143
+ pmovsxwd m2, [r0]
144
+ pmovsxwd m4, [r0 + 8]
145
+ movu m3, [r1]
146
+ movu m5, [r1 + 16]
147
+ pmulld m2, m3
148
+ pmulld m4, m5
149
+ packssdw m2, m4
150
+ pmovsxwd m1, m2
151
+ psrldq m2, 8
152
+ pmovsxwd m2, m2
153
+ pslld m1, m0
154
+ pslld m2, m0
155
+ packssdw m1, m2
156
+ movu [r2], m1
157
+
158
+ add r0, 16
159
+ add r1, 32
160
+ add r2, 16
161
+ dec r3d
162
+ jnz .part1
163
+.end:
164
+ RET
165
+
166
+;----------------------------------------------------------------------------------------------------------------------
167
+;void dequant_scaling(const int16_t* src, const int32_t* dequantCoef, int16_t* dst, int num, int mcqp_miper, int shift)
168
+;----------------------------------------------------------------------------------------------------------------------
169
+INIT_YMM avx2
170
+cglobal dequant_scaling, 6,6,6
171
+ add r5d, 4
172
+ shr r3d, 4 ; num/16
173
+ cmp r5d, r4d
174
+ jle .skip
175
+ sub r5d, r4d
176
+ mova m0, [pd_1]
177
+ movd xm1, r5d ; shift - per
178
+ dec r5d
179
+ movd xm2, r5d ; shift - per - 1
180
+ pslld m0, xm2 ; 1 << shift - per - 1
181
+
182
+.part0:
183
+ pmovsxwd m2, [r0]
184
+ pmovsxwd m4, [r0 + 16]
185
+ movu m3, [r1]
186
+ movu m5, [r1 + 32]
187
+ pmulld m2, m3
188
+ pmulld m4, m5
189
+ paddd m2, m0
190
+ paddd m4, m0
191
+ psrad m2, xm1
192
+ psrad m4, xm1
193
+ packssdw m2, m4
194
+ vpermq m2, m2, 11011000b
195
+ movu [r2], m2
196
+
197
+ add r0, 32
198
+ add r1, 64
199
+ add r2, 32
200
+ dec r3d
201
x265_1.7.tar.gz/source/common/x86/pixel.h -> x265_1.8.tar.gz/source/common/x86/pixel.h
Changed
201
1
2
#ifndef X265_I386_PIXEL_H
3
#define X265_I386_PIXEL_H
4
5
-#define DECL_PIXELS(ret, name, suffix, args) \
6
- ret x265_pixel_ ## name ## _16x64_ ## suffix args; \
7
- ret x265_pixel_ ## name ## _16x32_ ## suffix args; \
8
- ret x265_pixel_ ## name ## _16x16_ ## suffix args; \
9
- ret x265_pixel_ ## name ## _16x12_ ## suffix args; \
10
- ret x265_pixel_ ## name ## _16x8_ ## suffix args; \
11
- ret x265_pixel_ ## name ## _16x4_ ## suffix args; \
12
- ret x265_pixel_ ## name ## _8x32_ ## suffix args; \
13
- ret x265_pixel_ ## name ## _8x16_ ## suffix args; \
14
- ret x265_pixel_ ## name ## _8x8_ ## suffix args; \
15
- ret x265_pixel_ ## name ## _8x4_ ## suffix args; \
16
- ret x265_pixel_ ## name ## _4x16_ ## suffix args; \
17
- ret x265_pixel_ ## name ## _4x8_ ## suffix args; \
18
- ret x265_pixel_ ## name ## _4x4_ ## suffix args; \
19
- ret x265_pixel_ ## name ## _32x8_ ## suffix args; \
20
- ret x265_pixel_ ## name ## _32x16_ ## suffix args; \
21
- ret x265_pixel_ ## name ## _32x24_ ## suffix args; \
22
- ret x265_pixel_ ## name ## _24x32_ ## suffix args; \
23
- ret x265_pixel_ ## name ## _32x32_ ## suffix args; \
24
- ret x265_pixel_ ## name ## _32x64_ ## suffix args; \
25
- ret x265_pixel_ ## name ## _64x16_ ## suffix args; \
26
- ret x265_pixel_ ## name ## _64x32_ ## suffix args; \
27
- ret x265_pixel_ ## name ## _64x48_ ## suffix args; \
28
- ret x265_pixel_ ## name ## _64x64_ ## suffix args; \
29
- ret x265_pixel_ ## name ## _48x64_ ## suffix args; \
30
- ret x265_pixel_ ## name ## _24x32_ ## suffix args; \
31
- ret x265_pixel_ ## name ## _12x16_ ## suffix args; \
32
-
33
-#define DECL_X1(name, suffix) \
34
- DECL_PIXELS(int, name, suffix, (const pixel*, intptr_t, const pixel*, intptr_t))
35
-
36
-#define DECL_X1_SS(name, suffix) \
37
- DECL_PIXELS(int, name, suffix, (const int16_t*, intptr_t, const int16_t*, intptr_t))
38
-
39
-#define DECL_X1_SP(name, suffix) \
40
- DECL_PIXELS(int, name, suffix, (const int16_t*, intptr_t, const pixel*, intptr_t))
41
-
42
-#define DECL_X4(name, suffix) \
43
- DECL_PIXELS(void, name ## _x3, suffix, (const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*)) \
44
- DECL_PIXELS(void, name ## _x4, suffix, (const pixel*, const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*))
45
-
46
-/* sad-a.asm */
47
-DECL_X1(sad, mmx2)
48
-DECL_X1(sad, sse2)
49
-DECL_X4(sad, sse2_misalign)
50
-DECL_X1(sad, sse3)
51
-DECL_X1(sad, sse2_aligned)
52
-DECL_X1(sad, ssse3)
53
-DECL_X1(sad, ssse3_aligned)
54
-DECL_X1(sad, avx2)
55
-DECL_X1(sad, avx2_aligned)
56
-DECL_X4(sad, mmx2)
57
-DECL_X4(sad, sse2)
58
-DECL_X4(sad, sse3)
59
-DECL_X4(sad, ssse3)
60
-DECL_X4(sad, avx)
61
-DECL_X4(sad, avx2)
62
-DECL_X1(sad, cache32_mmx2);
63
-DECL_X1(sad, cache64_mmx2);
64
-DECL_X1(sad, cache64_sse2);
65
-DECL_X1(sad, cache64_ssse3);
66
-DECL_X4(sad, cache32_mmx2);
67
-DECL_X4(sad, cache64_mmx2);
68
-DECL_X4(sad, cache64_sse2);
69
-DECL_X4(sad, cache64_ssse3);
70
-
71
-/* pixel-a.asm */
72
-DECL_X1(satd, mmx2)
73
-DECL_X1(satd, sse2)
74
-DECL_X1(satd, ssse3)
75
-DECL_X1(satd, ssse3_atom)
76
-DECL_X1(satd, sse4)
77
-DECL_X1(satd, avx)
78
-DECL_X1(satd, xop)
79
-DECL_X1(satd, avx2)
80
-int x265_pixel_satd_16x24_avx(const pixel*, intptr_t, const pixel*, intptr_t);
81
-int x265_pixel_satd_32x48_avx(const pixel*, intptr_t, const pixel*, intptr_t);
82
-int x265_pixel_satd_24x64_avx(const pixel*, intptr_t, const pixel*, intptr_t);
83
-int x265_pixel_satd_8x64_avx(const pixel*, intptr_t, const pixel*, intptr_t);
84
-int x265_pixel_satd_8x12_avx(const pixel*, intptr_t, const pixel*, intptr_t);
85
-int x265_pixel_satd_12x32_avx(const pixel*, intptr_t, const pixel*, intptr_t);
86
-int x265_pixel_satd_4x32_avx(const pixel*, intptr_t, const pixel*, intptr_t);
87
-int x265_pixel_satd_8x32_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
88
-int x265_pixel_satd_16x4_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
89
-int x265_pixel_satd_16x12_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
90
-int x265_pixel_satd_16x32_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
91
-int x265_pixel_satd_16x64_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
92
-
93
-DECL_X1(sa8d, mmx2)
94
-DECL_X1(sa8d, sse2)
95
-DECL_X1(sa8d, ssse3)
96
-DECL_X1(sa8d, ssse3_atom)
97
-DECL_X1(sa8d, sse4)
98
-DECL_X1(sa8d, avx)
99
-DECL_X1(sa8d, xop)
100
-DECL_X1(sa8d, avx2)
101
-
102
-/* ssd-a.asm */
103
-DECL_X1(ssd, mmx)
104
-DECL_X1(ssd, mmx2)
105
-DECL_X1(ssd, sse2slow)
106
-DECL_X1(ssd, sse2)
107
-DECL_X1(ssd, ssse3)
108
-DECL_X1(ssd, avx)
109
-DECL_X1(ssd, xop)
110
-DECL_X1(ssd, avx2)
111
-DECL_X1_SS(ssd_ss, mmx)
112
-DECL_X1_SS(ssd_ss, mmx2)
113
-DECL_X1_SS(ssd_ss, sse2slow)
114
-DECL_X1_SS(ssd_ss, sse2)
115
-DECL_X1_SS(ssd_ss, ssse3)
116
-DECL_X1_SS(ssd_ss, sse4)
117
-DECL_X1_SS(ssd_ss, avx)
118
-DECL_X1_SS(ssd_ss, xop)
119
-DECL_X1_SS(ssd_ss, avx2)
120
-DECL_X1_SP(ssd_sp, sse4)
121
-#define DECL_HEVC_SSD(suffix) \
122
- int x265_pixel_ssd_32x64_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
123
- int x265_pixel_ssd_16x64_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
124
- int x265_pixel_ssd_32x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
125
- int x265_pixel_ssd_32x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
126
- int x265_pixel_ssd_16x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
127
- int x265_pixel_ssd_32x24_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
128
- int x265_pixel_ssd_24x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
129
- int x265_pixel_ssd_32x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
130
- int x265_pixel_ssd_8x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
131
- int x265_pixel_ssd_16x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
132
- int x265_pixel_ssd_16x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
133
- int x265_pixel_ssd_8x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
134
- int x265_pixel_ssd_16x12_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
135
- int x265_pixel_ssd_16x4_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
136
- int x265_pixel_ssd_8x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
137
- int x265_pixel_ssd_8x4_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t);
138
-DECL_HEVC_SSD(sse2)
139
-DECL_HEVC_SSD(ssse3)
140
-DECL_HEVC_SSD(avx)
141
-
142
-int x265_pixel_ssd_12x16_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
143
-int x265_pixel_ssd_24x32_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
144
-int x265_pixel_ssd_48x64_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
145
-int x265_pixel_ssd_64x16_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
146
-int x265_pixel_ssd_64x32_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
147
-int x265_pixel_ssd_64x48_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
148
-int x265_pixel_ssd_64x64_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
149
-
150
-int x265_pixel_ssd_s_4_sse2(const int16_t*, intptr_t);
151
-int x265_pixel_ssd_s_8_sse2(const int16_t*, intptr_t);
152
-int x265_pixel_ssd_s_16_sse2(const int16_t*, intptr_t);
153
-int x265_pixel_ssd_s_32_sse2(const int16_t*, intptr_t);
154
-int x265_pixel_ssd_s_16_avx2(const int16_t*, intptr_t);
155
-int x265_pixel_ssd_s_32_avx2(const int16_t*, intptr_t);
156
-
157
-#define ADDAVG(func) \
158
- void x265_ ## func ## _sse4(const int16_t*, const int16_t*, pixel*, intptr_t, intptr_t, intptr_t); \
159
- void x265_ ## func ## _avx2(const int16_t*, const int16_t*, pixel*, intptr_t, intptr_t, intptr_t);
160
-ADDAVG(addAvg_2x4)
161
-ADDAVG(addAvg_2x8)
162
-ADDAVG(addAvg_4x2);
163
-ADDAVG(addAvg_4x4)
164
-ADDAVG(addAvg_4x8)
165
-ADDAVG(addAvg_4x16)
166
-ADDAVG(addAvg_6x8)
167
-ADDAVG(addAvg_8x2)
168
-ADDAVG(addAvg_8x4)
169
-ADDAVG(addAvg_8x6)
170
-ADDAVG(addAvg_8x8)
171
-ADDAVG(addAvg_8x16)
172
-ADDAVG(addAvg_8x32)
173
-ADDAVG(addAvg_12x16)
174
-ADDAVG(addAvg_16x4)
175
-ADDAVG(addAvg_16x8)
176
-ADDAVG(addAvg_16x12)
177
-ADDAVG(addAvg_16x16)
178
-ADDAVG(addAvg_16x32)
179
-ADDAVG(addAvg_16x64)
180
-ADDAVG(addAvg_24x32)
181
-ADDAVG(addAvg_32x8)
182
-ADDAVG(addAvg_32x16)
183
-ADDAVG(addAvg_32x24)
184
-ADDAVG(addAvg_32x32)
185
-ADDAVG(addAvg_32x64)
186
-ADDAVG(addAvg_48x64)
187
-ADDAVG(addAvg_64x16)
188
-ADDAVG(addAvg_64x32)
189
-ADDAVG(addAvg_64x48)
190
-ADDAVG(addAvg_64x64)
191
-
192
-ADDAVG(addAvg_2x16)
193
-ADDAVG(addAvg_4x32)
194
-ADDAVG(addAvg_6x16)
195
-ADDAVG(addAvg_8x12)
196
-ADDAVG(addAvg_8x64)
197
-ADDAVG(addAvg_12x32)
198
-ADDAVG(addAvg_16x24)
199
-ADDAVG(addAvg_24x64)
200
-ADDAVG(addAvg_32x48)
201
x265_1.7.tar.gz/source/common/x86/sad-a.asm -> x265_1.8.tar.gz/source/common/x86/sad-a.asm
Changed
163
1
2
;* Fiona Glaser <fiona@x264.com>
3
;* Laurent Aimar <fenrir@via.ecp.fr>
4
;* Alex Izvorski <aizvorksi@gmail.com>
5
+;* Min Chen <chenm003@163.com>
6
;*
7
;* This program is free software; you can redistribute it and/or modify
8
;* it under the terms of the GNU General Public License as published by
9
10
SECTION_RODATA 32
11
12
MSK: db 255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0
13
-pb_shuf8x8c2: times 2 db 0,0,0,0,8,8,8,8,-1,-1,-1,-1,-1,-1,-1,-1
14
-hpred_shuf: db 0,0,2,2,8,8,10,10,1,1,3,3,9,9,11,11
15
16
SECTION .text
17
18
cextern pb_3
19
cextern pb_shuf8x8c
20
cextern pw_8
21
-cextern sw_64
22
+cextern pd_64
23
24
;=============================================================================
25
; SAD MMX
26
27
%endif
28
%endmacro
29
30
+%macro SAD_X4_START_2x32P_AVX2 0
31
+ mova m4, [r0]
32
+ movu m0, [r1]
33
+ movu m2, [r2]
34
+ movu m1, [r3]
35
+ movu m3, [r4]
36
+ psadbw m0, m4
37
+ psadbw m2, m4
38
+ psadbw m1, m4
39
+ psadbw m3, m4
40
+ packusdw m0, m2
41
+ packusdw m1, m3
42
+
43
+ mova m6, [r0+FENC_STRIDE]
44
+ movu m2, [r1+r5]
45
+ movu m4, [r2+r5]
46
+ movu m3, [r3+r5]
47
+ movu m5, [r4+r5]
48
+ psadbw m2, m6
49
+ psadbw m4, m6
50
+ psadbw m3, m6
51
+ psadbw m5, m6
52
+ packusdw m2, m4
53
+ packusdw m3, m5
54
+ paddd m0, m2
55
+ paddd m1, m3
56
+%endmacro
57
+
58
+%macro SAD_X4_2x32P_AVX2 4
59
+ mova m6, [r0+%1]
60
+ movu m2, [r1+%2]
61
+ movu m4, [r2+%2]
62
+ movu m3, [r3+%2]
63
+ movu m5, [r4+%2]
64
+ psadbw m2, m6
65
+ psadbw m4, m6
66
+ psadbw m3, m6
67
+ psadbw m5, m6
68
+ packusdw m2, m4
69
+ packusdw m3, m5
70
+ paddd m0, m2
71
+ paddd m1, m3
72
+
73
+ mova m6, [r0+%3]
74
+ movu m2, [r1+%4]
75
+ movu m4, [r2+%4]
76
+ movu m3, [r3+%4]
77
+ movu m5, [r4+%4]
78
+ psadbw m2, m6
79
+ psadbw m4, m6
80
+ psadbw m3, m6
81
+ psadbw m5, m6
82
+ packusdw m2, m4
83
+ packusdw m3, m5
84
+ paddd m0, m2
85
+ paddd m1, m3
86
+%endmacro
87
+
88
+%macro SAD_X4_4x32P_AVX2 2
89
+%if %1==0
90
+ lea r6, [r5*3]
91
+ SAD_X4_START_2x32P_AVX2
92
+%else
93
+ SAD_X4_2x32P_AVX2 FENC_STRIDE*(0+(%1&1)*4), r5*0, FENC_STRIDE*(1+(%1&1)*4), r5*1
94
+%endif
95
+ SAD_X4_2x32P_AVX2 FENC_STRIDE*(2+(%1&1)*4), r5*2, FENC_STRIDE*(3+(%1&1)*4), r6
96
+%if %1 != %2-1
97
+%if (%1&1) != 0
98
+ add r0, 8*FENC_STRIDE
99
+%endif
100
+ lea r1, [r1+4*r5]
101
+ lea r2, [r2+4*r5]
102
+ lea r3, [r3+4*r5]
103
+ lea r4, [r4+4*r5]
104
+%endif
105
+%endmacro
106
+
107
%macro SAD_X3_END_AVX2 0
108
movifnidn r5, r5mp
109
packssdw m0, m1 ; 0 0 1 1 0 0 1 1
110
111
RET
112
%endmacro
113
114
+%macro SAD_X4_32P_END_AVX2 0
115
+ mov r0, r6mp
116
+ vextracti128 xm2, m0, 1
117
+ vextracti128 xm3, m1, 1
118
+ paddd xm0, xm2
119
+ paddd xm1, xm3
120
+ phaddd xm0, xm1
121
+ mova [r0], xm0
122
+ RET
123
+%endmacro
124
+
125
;-----------------------------------------------------------------------------
126
; void pixel_sad_x3_16x16( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1,
127
; uint8_t *pix2, intptr_t i_stride, int scores[3] )
128
129
SAD_X%1_4x%2P_AVX2 x, %3/4
130
%assign x x+1
131
%endrep
132
+
133
+ %if (%1==4) && (%2==32)
134
+ SAD_X%1_32P_END_AVX2
135
+ %else
136
SAD_X%1_END_AVX2
137
+ %endif
138
%endmacro
139
140
INIT_YMM avx2
141
142
SAD_X_AVX2 4, 16, 12, 8
143
SAD_X_AVX2 4, 16, 8, 8
144
145
+SAD_X_AVX2 4, 32, 8, 8
146
+SAD_X_AVX2 4, 32, 16, 8
147
+SAD_X_AVX2 4, 32, 24, 8
148
+SAD_X_AVX2 4, 32, 32, 8
149
+SAD_X_AVX2 4, 32, 64, 8
150
+
151
;=============================================================================
152
; SAD cacheline split
153
;=============================================================================
154
155
jle pixel_sad_%1x%2_mmx2
156
and eax, 7
157
shl eax, 3
158
- movd mm6, [sw_64]
159
+ movd mm6, [pd_64]
160
movd mm7, eax
161
psubw mm6, mm7
162
PROLOGUE 4,5
163
x265_1.7.tar.gz/source/common/x86/sad16-a.asm -> x265_1.8.tar.gz/source/common/x86/sad16-a.asm
Changed
201
1
2
;* Authors: Oskar Arvidsson <oskar@irock.se>
3
;* Henrik Gramner <henrik@gramner.com>
4
;* Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com>
5
+;* Min Chen <chenm003@163.com>
6
;*
7
;* This program is free software; you can redistribute it and/or modify
8
;* it under the terms of the GNU General Public License as published by
9
10
lea r2, [r2+2*r3]
11
paddw m1, m2
12
paddw m3, m4
13
+ %if BIT_DEPTH <= 10
14
paddw m0, m1
15
paddw m0, m3
16
+ %else
17
+ paddw m1, m3
18
+ pmaddwd m1, [pw_1]
19
+ paddd m0, m1
20
+ %endif
21
%endmacro
22
23
%macro SAD_INC_2x8P_MMX 0
24
25
lea r2, [r2+4*r3]
26
paddw m1, m2
27
paddw m3, m4
28
+ %if BIT_DEPTH <= 10
29
paddw m0, m1
30
paddw m0, m3
31
+ %else
32
+ paddw m1, m3
33
+ pmaddwd m1, [pw_1]
34
+ paddd m0, m1
35
+ %endif
36
%endmacro
37
38
%macro SAD_INC_2x4P_MMX 0
39
40
ABSW2 m1, m2, m1, m2, m3, m4
41
lea r0, [r0+4*r1]
42
lea r2, [r2+4*r3]
43
+ %if BIT_DEPTH <= 10
44
paddw m0, m1
45
paddw m0, m2
46
+ %else
47
+ paddw m1, m2
48
+ pmaddwd m1, [pw_1]
49
+ paddd m0, m1
50
+ %endif
51
%endmacro
52
53
;-----------------------------------------------------------------------------
54
55
jg .loop
56
%endif
57
%if %1*%2 == 256
58
+ %if BIT_DEPTH <= 10
59
HADDUW m0, m1
60
+ %else
61
+ HADDD m0, m1
62
+ %endif
63
%else
64
+ %if BIT_DEPTH <= 10
65
HADDW m0, m1
66
+ %else
67
+ HADDD m0, m1
68
+ %endif
69
%endif
70
movd eax, m0
71
RET
72
73
ABSW2 m3, m4, m3, m4, m7, m5
74
paddw m1, m2
75
paddw m3, m4
76
- paddw m0, m1
77
- paddw m0, m3
78
+ paddw m1, m3
79
+ pmaddwd m1, [pw_1]
80
+ paddd m0, m1
81
%else
82
movu m1, [r2]
83
movu m2, [r2+2*r3]
84
85
ABSW2 m1, m2, m1, m2, m3, m4
86
lea r0, [r0+4*r1]
87
lea r2, [r2+4*r3]
88
- paddw m0, m1
89
- paddw m0, m2
90
+ paddw m1, m2
91
+ pmaddwd m1, [pw_1]
92
+ paddd m0, m1
93
%endif
94
%endmacro
95
96
97
ABSW2 m3, m4, m3, m4, m7, m5
98
paddw m1, m2
99
paddw m3, m4
100
- paddw m0, m1
101
- paddw m8, m3
102
+ paddw m1, m3
103
+ pmaddwd m1, [pw_1]
104
+ paddd m0, m1
105
%else
106
movu m1, [r2]
107
movu m2, [r2 + 2 * r3]
108
109
ABSW2 m1, m2, m1, m2, m3, m4
110
lea r0, [r0 + 4 * r1]
111
lea r2, [r2 + 4 * r3]
112
- paddw m0, m1
113
- paddw m8, m2
114
+ paddw m1, m2
115
+ pmaddwd m1, [pw_1]
116
+ paddd m0, m1
117
%endif
118
%endmacro
119
120
121
; int pixel_sad_NxM(uint16_t *, intptr_t, uint16_t *, intptr_t)
122
; ---------------------------------------------------------------------------- -
123
%macro SAD 2
124
-cglobal pixel_sad_%1x%2, 4,5-(%2&4/4),8*(%1/mmsize)
125
+cglobal pixel_sad_%1x%2, 4,5,8
126
pxor m0, m0
127
%if %2 == 4
128
SAD_INC_2ROW %1
129
130
dec r4d
131
jg .loop
132
%endif
133
-%if %2 == 32
134
- HADDUWD m0, m1
135
HADDD m0, m1
136
-%else
137
- HADDW m0, m1
138
-%endif
139
movd eax, xm0
140
RET
141
%endmacro
142
143
; int pixel_sad_Nx64(uint16_t *, intptr_t, uint16_t *, intptr_t)
144
; ---------------------------------------------------------------------------- -
145
%macro SAD_Nx64 1
146
-cglobal pixel_sad_%1x64, 4,5-(64&4/4), 9
147
+cglobal pixel_sad_%1x64, 4,5, 8
148
pxor m0, m0
149
- pxor m8, m8
150
mov r4d, 64 / 2
151
.loop:
152
SAD_INC_2ROW_Nx64 %1
153
dec r4d
154
jg .loop
155
156
- HADDUWD m0, m1
157
- HADDUWD m8, m1
158
HADDD m0, m1
159
- HADDD m8, m1
160
- paddd m0, m8
161
-
162
movd eax, xm0
163
RET
164
%endmacro
165
166
SAD 16, 16
167
SAD 16, 32
168
169
+INIT_YMM avx2
170
+cglobal pixel_sad_16x64, 4,7,4
171
+ pxor m0, m0
172
+ pxor m3, m3
173
+ mov r4d, 64 / 8
174
+ add r3d, r3d
175
+ add r1d, r1d
176
+ lea r5, [r1 * 3]
177
+ lea r6, [r3 * 3]
178
+.loop:
179
+ movu m1, [r2]
180
+ movu m2, [r2 + r3]
181
+ psubw m1, [r0]
182
+ psubw m2, [r0 + r1]
183
+ pabsw m1, m1
184
+ pabsw m2, m2
185
+ paddw m0, m1
186
+ paddw m3, m2
187
+
188
+ movu m1, [r2 + 2 * r3]
189
+ movu m2, [r2 + r6]
190
+ psubw m1, [r0 + 2 * r1]
191
+ psubw m2, [r0 + r5]
192
+ pabsw m1, m1
193
+ pabsw m2, m2
194
+ paddw m0, m1
195
+ paddw m3, m2
196
+
197
+ lea r0, [r0 + 4 * r1]
198
+ lea r2, [r2 + 4 * r3]
199
+
200
+ movu m1, [r2]
201
x265_1.7.tar.gz/source/common/x86/ssd-a.asm -> x265_1.8.tar.gz/source/common/x86/ssd-a.asm
Changed
201
1
2
RET
3
%endmacro
4
5
+; Function to find ssd for 32x16 block, sse2, 12 bit depth
6
+; Defined sepeartely to be called from SSD_ONE_32 macro
7
+INIT_XMM sse2
8
+cglobal ssd_ss_32x16
9
+ pxor m8, m8
10
+ mov r4d, 16
11
+.loop:
12
+ movu m0, [r0]
13
+ movu m1, [r0+mmsize]
14
+ movu m2, [r0+2*mmsize]
15
+ movu m3, [r0+3*mmsize]
16
+ movu m4, [r2]
17
+ movu m5, [r2+mmsize]
18
+ movu m6, [r2+2*mmsize]
19
+ movu m7, [r2+3*mmsize]
20
+ psubw m0, m4
21
+ psubw m1, m5
22
+ psubw m2, m6
23
+ psubw m3, m7
24
+ add r0, r1
25
+ add r2, r3
26
+ pmaddwd m0, m0
27
+ pmaddwd m1, m1
28
+ pmaddwd m2, m2
29
+ pmaddwd m3, m3
30
+ paddd m2, m3
31
+ paddd m0, m1
32
+ paddd m0, m2
33
+ paddd m8, m0
34
+ dec r4d
35
+ jnz .loop
36
+
37
+ mova m4, m8
38
+ pxor m5, m5
39
+ punpckldq m8, m5
40
+ punpckhdq m4, m5
41
+ paddq m4, m8
42
+ movhlps m5, m4
43
+ paddq m4, m5
44
+ paddq m9, m4
45
+ ret
46
+
47
+%macro SSD_ONE_32 0
48
+cglobal pixel_ssd_ss_32x64, 4,7,10
49
+ add r1d, r1d
50
+ add r3d, r3d
51
+ pxor m9, m9
52
+ xor r4, r4
53
+ call ssd_ss_32x16
54
+ call ssd_ss_32x16
55
+ call ssd_ss_32x16
56
+ call ssd_ss_32x16
57
+ movq rax, m9
58
+ RET
59
+%endmacro
60
+
61
%macro SSD_TWO 2
62
cglobal pixel_ssd_ss_%1x%2, 4,7,8
63
FIX_STRIDES r1, r3
64
65
movd eax, xm0
66
RET
67
%endmacro
68
+
69
+INIT_YMM avx2
70
+cglobal pixel_ssd_16x16, 4,7,8
71
+ FIX_STRIDES r1, r3
72
+ lea r5, [3 * r1]
73
+ lea r6, [3 * r3]
74
+ mov r4d, 4
75
+ pxor m0, m0
76
+.loop:
77
+ movu m1, [r0]
78
+ movu m2, [r0 + r1]
79
+ movu m3, [r0 + r1 * 2]
80
+ movu m4, [r0 + r5]
81
+ movu m6, [r2]
82
+ movu m7, [r2 + r3]
83
+ psubw m1, m6
84
+ psubw m2, m7
85
+ movu m6, [r2 + r3 * 2]
86
+ movu m7, [r2 + r6]
87
+ psubw m3, m6
88
+ psubw m4, m7
89
+
90
+ lea r0, [r0 + r1 * 4]
91
+ lea r2, [r2 + r3 * 4]
92
+
93
+ pmaddwd m1, m1
94
+ pmaddwd m2, m2
95
+ pmaddwd m3, m3
96
+ pmaddwd m4, m4
97
+ paddd m1, m2
98
+ paddd m3, m4
99
+ paddd m0, m1
100
+ paddd m0, m3
101
+
102
+ dec r4d
103
+ jg .loop
104
+
105
+ HADDD m0, m5
106
+ movd eax, xm0
107
+ RET
108
+
109
+INIT_YMM avx2
110
+cglobal pixel_ssd_32x32, 4,7,8
111
+ add r1, r1
112
+ add r3, r3
113
+ mov r4d, 16
114
+ pxor m0, m0
115
+.loop:
116
+ movu m1, [r0]
117
+ movu m2, [r0 + 32]
118
+ movu m3, [r0 + r1]
119
+ movu m4, [r0 + r1 + 32]
120
+ movu m6, [r2]
121
+ movu m7, [r2 + 32]
122
+ psubw m1, m6
123
+ psubw m2, m7
124
+ movu m6, [r2 + r3]
125
+ movu m7, [r2 + r3 + 32]
126
+ psubw m3, m6
127
+ psubw m4, m7
128
+
129
+ lea r0, [r0 + r1 * 2]
130
+ lea r2, [r2 + r3 * 2]
131
+
132
+ pmaddwd m1, m1
133
+ pmaddwd m2, m2
134
+ pmaddwd m3, m3
135
+ pmaddwd m4, m4
136
+ paddd m1, m2
137
+ paddd m3, m4
138
+ paddd m0, m1
139
+ paddd m0, m3
140
+
141
+ dec r4d
142
+ jg .loop
143
+
144
+ HADDD m0, m5
145
+ movd eax, xm0
146
+ RET
147
+
148
+INIT_YMM avx2
149
+cglobal pixel_ssd_64x64, 4,7,8
150
+ FIX_STRIDES r1, r3
151
+ mov r4d, 64
152
+ pxor m0, m0
153
+.loop:
154
+ movu m1, [r0]
155
+ movu m2, [r0+32]
156
+ movu m3, [r0+32*2]
157
+ movu m4, [r0+32*3]
158
+ movu m6, [r2]
159
+ movu m7, [r2+32]
160
+ psubw m1, m6
161
+ psubw m2, m7
162
+ movu m6, [r2+32*2]
163
+ movu m7, [r2+32*3]
164
+ psubw m3, m6
165
+ psubw m4, m7
166
+
167
+ lea r0, [r0+r1]
168
+ lea r2, [r2+r3]
169
+
170
+ pmaddwd m1, m1
171
+ pmaddwd m2, m2
172
+ pmaddwd m3, m3
173
+ pmaddwd m4, m4
174
+ paddd m1, m2
175
+ paddd m3, m4
176
+ paddd m0, m1
177
+ paddd m0, m3
178
+
179
+ dec r4d
180
+ jg .loop
181
+
182
+ HADDD m0, m5
183
+ movd eax, xm0
184
+ RET
185
+
186
INIT_MMX mmx2
187
SSD_ONE 4, 4
188
SSD_ONE 4, 8
189
190
SSD_ONE 32, 16
191
SSD_ONE 32, 24
192
SSD_ONE 32, 32
193
-SSD_ONE 32, 64
194
+
195
+%if BIT_DEPTH <= 10
196
+ SSD_ONE 32, 64
197
+%else
198
+ SSD_ONE_32
199
+%endif
200
+
201
x265_1.7.tar.gz/source/common/x86/x86inc.asm -> x265_1.8.tar.gz/source/common/x86/x86inc.asm
Changed
24
1
2
; to x264-devel@videolan.org .
3
4
%ifndef private_prefix
5
- %define private_prefix x265
6
+ %define private_prefix X265_NS
7
%endif
8
9
%ifndef public_prefix
10
11
%endif
12
%endmacro
13
%endif
14
-
15
-; workaround: vpbroadcastd with register, the yasm will generate wrong code
16
-%macro vpbroadcastd 2
17
- %ifid %2
18
- movd %1 %+ xmm, %2
19
- vpbroadcastd %1, %1 %+ xmm
20
- %else
21
- vpbroadcastd %1, %2
22
- %endif
23
-%endmacro
24
x265_1.7.tar.gz/source/common/x86/x86util.asm -> x265_1.8.tar.gz/source/common/x86/x86util.asm
Changed
17
1
2
%if sizeof%1==32
3
; %3 = abcdefgh ijklmnop (lower address)
4
; %2 = ABCDEFGH IJKLMNOP (higher address)
5
-; vperm2i128 %5, %2, %3, q0003 ; %5 = ijklmnop ABCDEFGH
6
-%if %4 < 16
7
- palignr %1, %5, %3, %4 ; %1 = bcdefghi jklmnopA
8
+ vperm2i128 %4, %1, %2, q0003 ; %4 = ijklmnop ABCDEFGH
9
+%if %3 < 16
10
+ palignr %1, %4, %2, %3 ; %1 = bcdefghi jklmnopA
11
%else
12
- palignr %1, %2, %5, %4-16 ; %1 = pABCDEFG HIJKLMNO
13
+ palignr %1, %2, %4, %3-16 ; %1 = pABCDEFG HIJKLMNO
14
%endif
15
%elif cpuflag(ssse3)
16
%if %0==5
17
x265_1.7.tar.gz/source/common/yuv.cpp -> x265_1.8.tar.gz/source/common/yuv.cpp
Changed
10
1
2
#include "picyuv.h"
3
#include "primitives.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
Yuv::Yuv()
9
{
10
x265_1.7.tar.gz/source/common/yuv.h -> x265_1.8.tar.gz/source/common/yuv.h
Changed
10
1
2
#include "common.h"
3
#include "primitives.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class ShortYuv;
10
x265_1.7.tar.gz/source/compat/getopt/getopt.h -> x265_1.8.tar.gz/source/compat/getopt/getopt.h
Changed
35
1
2
/* Many other libraries have conflicting prototypes for getopt, with
3
differences in the consts, in stdlib.h. To avoid compilation
4
errors, only prototype getopt for the GNU C library. */
5
-extern int getopt (int __argc, char *const *__argv, const char *__shortopts);
6
+extern int getopt (int argc, char *const *argv, const char *shortopts);
7
# else /* not __GNU_LIBRARY__ */
8
extern int getopt ();
9
# endif /* __GNU_LIBRARY__ */
10
11
# ifndef __need_getopt
12
-extern int getopt_long (int __argc, char *const *__argv, const char *__shortopts,
13
- const struct option *__longopts, int32_t *__longind);
14
-extern int getopt_long_only (int __argc, char *const *__argv,
15
- const char *__shortopts,
16
- const struct option *__longopts, int32_t *__longind);
17
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
18
+ const struct option *longopts, int32_t *longind);
19
+extern int getopt_long_only (int argc, char *const *argv,
20
+ const char *shortopts,
21
+ const struct option *longopts, int32_t *longind);
22
23
/* Internal only. Users should not call this directly. */
24
-extern int _getopt_internal (int __argc, char *const *__argv,
25
- const char *__shortopts,
26
- const struct option *__longopts, int32_t *__longind,
27
- int __long_only);
28
+extern int _getopt_internal (int argc, char *const *argv,
29
+ const char *shortopts,
30
+ const struct option *longopts, int32_t *longind,
31
+ int longonly);
32
# endif
33
#else /* not __STDC__ */
34
extern int getopt ();
35
x265_1.7.tar.gz/source/compat/msvc/stdint.h -> x265_1.8.tar.gz/source/compat/msvc/stdint.h
Changed
9
1
2
#if !defined(UINT64_MAX)
3
#include <limits.h>
4
#define UINT64_MAX _UI64_MAX
5
+#define INT16_MAX _I16_MAX
6
#endif
7
8
/* a minimal set of C99 types for use with MSVC (VC9) */
9
x265_1.7.tar.gz/source/encoder/CMakeLists.txt -> x265_1.8.tar.gz/source/encoder/CMakeLists.txt
Changed
22
1
2
add_definitions(/wd4701) # potentially uninitialized local variable 'foo' used
3
endif()
4
5
+if(EXTRA_LIB)
6
+ if(LINKED_8BIT)
7
+ list(APPEND APIFLAGS "-DLINKED_8BIT=1")
8
+ endif(LINKED_8BIT)
9
+ if(LINKED_10BIT)
10
+ list(APPEND APIFLAGS "-DLINKED_10BIT=1")
11
+ endif(LINKED_10BIT)
12
+ if(LINKED_12BIT)
13
+ list(APPEND APIFLAGS "-DLINKED_12BIT=1")
14
+ endif(LINKED_12BIT)
15
+ string(REPLACE ";" " " APIFLAGSTR "${APIFLAGS}")
16
+ set_source_files_properties(api.cpp PROPERTIES COMPILE_FLAGS ${APIFLAGSTR})
17
+endif(EXTRA_LIB)
18
+
19
add_library(encoder OBJECT ../x265.h
20
analysis.cpp analysis.h
21
search.cpp search.h
22
x265_1.7.tar.gz/source/encoder/analysis.cpp -> x265_1.8.tar.gz/source/encoder/analysis.cpp
Changed
201
1
2
#include "rdcost.h"
3
#include "encoder.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
/* An explanation of rate distortion levels (--rd-level)
9
*
10
11
return;
12
else if (md.bestMode->cu.isIntra(0))
13
{
14
- m_quant.m_tqBypass = true;
15
md.pred[PRED_LOSSLESS].initCosts();
16
md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
17
PartSize size = (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0];
18
uint8_t* modes = md.pred[PRED_LOSSLESS].cu.m_lumaIntraDir;
19
checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes, NULL);
20
checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
21
- m_quant.m_tqBypass = false;
22
}
23
else
24
{
25
- m_quant.m_tqBypass = true;
26
md.pred[PRED_LOSSLESS].initCosts();
27
md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
28
md.pred[PRED_LOSSLESS].predYuv.copyFromYuv(md.bestMode->predYuv);
29
encodeResAndCalcRdInterCU(md.pred[PRED_LOSSLESS], cuGeom);
30
checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
31
- m_quant.m_tqBypass = false;
32
}
33
}
34
35
36
/* perform Mode task, repeat until no more work is available */
37
do
38
{
39
+ uint32_t refMasks[2] = { 0, 0 };
40
+
41
if (m_param->rdLevel <= 4)
42
{
43
switch (pmode.modes[task])
44
45
break;
46
47
case PRED_2Nx2N:
48
- slave.checkInter_rd0_4(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N);
49
+ slave.checkInter_rd0_4(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, refMasks);
50
if (m_slice->m_sliceType == B_SLICE)
51
slave.checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], pmode.cuGeom);
52
break;
53
54
case PRED_Nx2N:
55
- slave.checkInter_rd0_4(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N);
56
+ slave.checkInter_rd0_4(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, refMasks);
57
break;
58
59
case PRED_2NxN:
60
- slave.checkInter_rd0_4(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN);
61
+ slave.checkInter_rd0_4(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, refMasks);
62
break;
63
64
case PRED_2NxnU:
65
- slave.checkInter_rd0_4(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU);
66
+ slave.checkInter_rd0_4(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, refMasks);
67
break;
68
69
case PRED_2NxnD:
70
- slave.checkInter_rd0_4(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD);
71
+ slave.checkInter_rd0_4(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, refMasks);
72
break;
73
74
case PRED_nLx2N:
75
- slave.checkInter_rd0_4(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N);
76
+ slave.checkInter_rd0_4(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, refMasks);
77
break;
78
79
case PRED_nRx2N:
80
- slave.checkInter_rd0_4(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N);
81
+ slave.checkInter_rd0_4(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, refMasks);
82
break;
83
84
default:
85
86
break;
87
88
case PRED_2Nx2N:
89
- slave.checkInter_rd5_6(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N);
90
+ slave.checkInter_rd5_6(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, refMasks);
91
md.pred[PRED_BIDIR].rdCost = MAX_INT64;
92
if (m_slice->m_sliceType == B_SLICE)
93
{
94
95
break;
96
97
case PRED_Nx2N:
98
- slave.checkInter_rd5_6(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N);
99
+ slave.checkInter_rd5_6(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, refMasks);
100
break;
101
102
case PRED_2NxN:
103
- slave.checkInter_rd5_6(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN);
104
+ slave.checkInter_rd5_6(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, refMasks);
105
break;
106
107
case PRED_2NxnU:
108
- slave.checkInter_rd5_6(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU);
109
+ slave.checkInter_rd5_6(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, refMasks);
110
break;
111
112
case PRED_2NxnD:
113
- slave.checkInter_rd5_6(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD);
114
+ slave.checkInter_rd5_6(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, refMasks);
115
break;
116
117
case PRED_nLx2N:
118
- slave.checkInter_rd5_6(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N);
119
+ slave.checkInter_rd5_6(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, refMasks);
120
break;
121
122
case PRED_nRx2N:
123
- slave.checkInter_rd5_6(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N);
124
+ slave.checkInter_rd5_6(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, refMasks);
125
break;
126
127
default:
128
129
/* RD selection between merge, inter, bidir and intra */
130
if (!m_bChromaSa8d) /* When m_bChromaSa8d is enabled, chroma MC has already been done */
131
{
132
- for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++)
133
+ uint32_t numPU = bestInter->cu.getNumPartInter(0);
134
+ for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
135
{
136
PredictionUnit pu(bestInter->cu, cuGeom, puIdx);
137
motionCompensation(bestInter->cu, pu, bestInter->predYuv, false, true);
138
139
else if (!md.bestMode->cu.m_mergeFlag[0])
140
{
141
/* finally code the best mode selected from SA8D costs */
142
- for (uint32_t puIdx = 0; puIdx < md.bestMode->cu.getNumPartInter(); puIdx++)
143
+ uint32_t numPU = md.bestMode->cu.getNumPartInter(0);
144
+ for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
145
{
146
PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx);
147
motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, true);
148
149
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
150
}
151
152
-void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp)
153
+uint32_t Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp)
154
{
155
uint32_t depth = cuGeom.depth;
156
uint32_t cuAddr = parentCTU.m_cuAddr;
157
158
bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
159
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
160
uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
161
-
162
+ bool earlyskip = false;
163
+ bool splitIntra = true;
164
+ uint32_t splitRefs[4] = { 0, 0, 0, 0 };
165
+ /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */
166
if (mightNotSplit && depth >= minDepth)
167
{
168
- bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;
169
-
170
/* Compute Merge Cost */
171
md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
172
md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
173
checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
174
-
175
- bool earlyskip = false;
176
if (m_param->rdLevel)
177
earlyskip = m_param->bEnableEarlySkip && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
178
+ }
179
+
180
+ bool bNoSplit = false;
181
+ if (md.bestMode)
182
+ {
183
+ bNoSplit = md.bestMode->cu.isSkipped(0);
184
+ if (mightSplit && depth && depth >= minDepth && !bNoSplit)
185
+ bNoSplit = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
186
+ }
187
+
188
+ /* Step 2. Evaluate each of the 4 split sub-blocks in series */
189
+ if (mightSplit && !bNoSplit)
190
+ {
191
+ Mode* splitPred = &md.pred[PRED_SPLIT];
192
+ splitPred->initCosts();
193
+ CUData* splitCU = &splitPred->cu;
194
+ splitCU->initSubCU(parentCTU, cuGeom, qp);
195
+
196
+ uint32_t nextDepth = depth + 1;
197
+ ModeDepth& nd = m_modeDepth[nextDepth];
198
+ invalidateContexts(nextDepth);
199
+ Entropy* nextContext = &m_rqt[depth].cur;
200
+ int nextQP = qp;
201
x265_1.7.tar.gz/source/encoder/analysis.h -> x265_1.8.tar.gz/source/encoder/analysis.h
Changed
32
1
2
#include "entropy.h"
3
#include "search.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class Entropy;
10
11
12
/* full analysis for a P or B slice CU */
13
void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);
14
- void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);
15
- void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp);
16
+ uint32_t compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);
17
+ uint32_t compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp);
18
19
/* measure merge and skip */
20
void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom& cuGeom);
21
- void checkMerge2Nx2N_rd5_6(Mode& skip, Mode& merge, const CUGeom& cuGeom, bool isSkipMode);
22
+ void checkMerge2Nx2N_rd5_6(Mode& skip, Mode& merge, const CUGeom& cuGeom, bool isShareMergeCand);
23
24
/* measure inter options */
25
- void checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize);
26
- void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize);
27
+ void checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, uint32_t refmask[2]);
28
+ void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, uint32_t refmask[2]);
29
30
void checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom);
31
32
x265_1.7.tar.gz/source/encoder/api.cpp -> x265_1.8.tar.gz/source/encoder/api.cpp
Changed
201
1
2
#include "nal.h"
3
#include "bitcost.h"
4
5
-using namespace x265;
6
+/* multilib namespace reflectors */
7
+#if LINKED_8BIT
8
+namespace x265_8bit {
9
+const x265_api* x265_api_get(int bitDepth);
10
+const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err);
11
+}
12
+#endif
13
+
14
+#if LINKED_10BIT
15
+namespace x265_10bit {
16
+const x265_api* x265_api_get(int bitDepth);
17
+const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err);
18
+}
19
+#endif
20
+
21
+#if LINKED_12BIT
22
+namespace x265_12bit {
23
+const x265_api* x265_api_get(int bitDepth);
24
+const x265_api* x265_api_query(int bitDepth, int apiVersion, int* err);
25
+}
26
+#endif
27
+
28
+#if EXPORT_C_API
29
+/* these functions are exported as C functions (default) */
30
+using namespace X265_NS;
31
+extern "C" {
32
+#else
33
+/* these functions exist within private namespace (multilib) */
34
+namespace X265_NS {
35
+#endif
36
37
-extern "C"
38
x265_encoder *x265_encoder_open(x265_param *p)
39
{
40
if (!p)
41
return NULL;
42
43
+#if _MSC_VER
44
+#pragma warning(disable: 4127) // conditional expression is constant, yes I know
45
+#endif
46
+
47
+#if HIGH_BIT_DEPTH
48
+ if (X265_DEPTH == 12)
49
+ x265_log(p, X265_LOG_WARNING, "Main12 is HIGHLY experimental, do not use!\n");
50
+ else if (X265_DEPTH != 10 && X265_DEPTH != 12)
51
+#else
52
+ if (X265_DEPTH != 8)
53
+#endif
54
+ {
55
+ x265_log(p, X265_LOG_ERROR, "Build error, internal bit depth mismatch\n");
56
+ return NULL;
57
+ }
58
+
59
Encoder* encoder = NULL;
60
- x265_param* param = x265_param_alloc();
61
- x265_param* latestParam = x265_param_alloc();
62
+ x265_param* param = PARAM_NS::x265_param_alloc();
63
+ x265_param* latestParam = PARAM_NS::x265_param_alloc();
64
if (!param || !latestParam)
65
goto fail;
66
67
memcpy(param, p, sizeof(x265_param));
68
- x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", x265_version_str);
69
- x265_log(param, X265_LOG_INFO, "build info %s\n", x265_build_info_str);
70
+ x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", PFX(version_str));
71
+ x265_log(param, X265_LOG_INFO, "build info %s\n", PFX(build_info_str));
72
73
- x265_setup_primitives(param, param->cpuid);
74
+ x265_setup_primitives(param);
75
76
if (x265_check_params(param))
77
goto fail;
78
79
80
encoder = new Encoder;
81
if (!param->rc.bEnableSlowFirstPass)
82
- x265_param_apply_fastfirstpass(param);
83
+ PARAM_NS::x265_param_apply_fastfirstpass(param);
84
85
// may change params for auto-detect, etc
86
encoder->configure(param);
87
88
89
fail:
90
delete encoder;
91
- x265_param_free(param);
92
- x265_param_free(latestParam);
93
+ PARAM_NS::x265_param_free(param);
94
+ PARAM_NS::x265_param_free(latestParam);
95
return NULL;
96
}
97
98
-extern "C"
99
int x265_encoder_headers(x265_encoder *enc, x265_nal **pp_nal, uint32_t *pi_nal)
100
{
101
if (pp_nal && enc)
102
103
return -1;
104
}
105
106
-extern "C"
107
void x265_encoder_parameters(x265_encoder *enc, x265_param *out)
108
{
109
if (enc && out)
110
111
}
112
}
113
114
-extern "C"
115
int x265_encoder_reconfig(x265_encoder* enc, x265_param* param_in)
116
{
117
if (!enc || !param_in)
118
119
return ret;
120
}
121
122
-extern "C"
123
int x265_encoder_encode(x265_encoder *enc, x265_nal **pp_nal, uint32_t *pi_nal, x265_picture *pic_in, x265_picture *pic_out)
124
{
125
if (!enc)
126
127
return numEncoded;
128
}
129
130
-extern "C"
131
void x265_encoder_get_stats(x265_encoder *enc, x265_stats *outputStats, uint32_t statsSizeBytes)
132
{
133
if (enc && outputStats)
134
135
}
136
}
137
138
-extern "C"
139
-void x265_encoder_log(x265_encoder* enc, int argc, char **argv)
140
+void x265_encoder_log(x265_encoder* enc, int, char **)
141
{
142
if (enc)
143
{
144
Encoder *encoder = static_cast<Encoder*>(enc);
145
- encoder->writeLog(argc, argv);
146
+ x265_log(encoder->m_param, X265_LOG_WARNING, "x265_encoder_log is now deprecated\n");
147
}
148
}
149
150
-extern "C"
151
void x265_encoder_close(x265_encoder *enc)
152
{
153
if (enc)
154
155
}
156
}
157
158
-extern "C"
159
void x265_cleanup(void)
160
{
161
if (!g_ctuSizeConfigured)
162
163
}
164
}
165
166
-extern "C"
167
x265_picture *x265_picture_alloc()
168
{
169
return (x265_picture*)x265_malloc(sizeof(x265_picture));
170
}
171
172
-extern "C"
173
void x265_picture_init(x265_param *param, x265_picture *pic)
174
{
175
memset(pic, 0, sizeof(x265_picture));
176
177
}
178
}
179
180
-extern "C"
181
void x265_picture_free(x265_picture *p)
182
{
183
return x265_free(p);
184
185
186
static const x265_api libapi =
187
{
188
- &x265_param_alloc,
189
- &x265_param_free,
190
- &x265_param_default,
191
- &x265_param_parse,
192
- &x265_param_apply_profile,
193
- &x265_param_default_preset,
194
+ X265_MAJOR_VERSION,
195
+ X265_BUILD,
196
+ sizeof(x265_param),
197
+ sizeof(x265_picture),
198
+ sizeof(x265_analysis_data),
199
+ sizeof(x265_zone),
200
+ sizeof(x265_stats),
201
x265_1.7.tar.gz/source/encoder/bitcost.cpp -> x265_1.8.tar.gz/source/encoder/bitcost.cpp
Changed
19
1
2
#include "primitives.h"
3
#include "bitcost.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
void BitCost::setQP(unsigned int qp)
9
{
10
11
12
// estimate same cost for negative and positive MVD
13
for (int i = 0; i <= 2 * BC_MAX_MV; i++)
14
- s_costs[qp][i] = s_costs[qp][-i] = (uint16_t)X265_MIN(s_bitsizes[i] * lambda + 0.5f, (1 << 16) - 1);
15
+ s_costs[qp][i] = s_costs[qp][-i] = (uint16_t)X265_MIN(s_bitsizes[i] * lambda + 0.5f, (1 << 15) - 1);
16
}
17
}
18
19
x265_1.7.tar.gz/source/encoder/bitcost.h -> x265_1.8.tar.gz/source/encoder/bitcost.h
Changed
10
1
2
#include "threading.h"
3
#include "mv.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private x265 namespace
8
9
class BitCost
10
x265_1.7.tar.gz/source/encoder/dpb.cpp -> x265_1.8.tar.gz/source/encoder/dpb.cpp
Changed
10
1
2
3
#include "dpb.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
DPB::~DPB()
9
{
10
x265_1.7.tar.gz/source/encoder/dpb.h -> x265_1.8.tar.gz/source/encoder/dpb.h
Changed
10
1
2
3
#include "piclist.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace for x265
8
9
class Frame;
10
x265_1.7.tar.gz/source/encoder/encoder.cpp -> x265_1.8.tar.gz/source/encoder/encoder.cpp
Changed
201
1
2
3
#include "x265.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
const char g_sliceTypeToChar[] = {'B', 'P', 'I'};
8
}
9
10
-static const char* summaryCSVHeader =
11
- "Command, Date/Time, Elapsed Time, FPS, Bitrate, "
12
- "Y PSNR, U PSNR, V PSNR, Global PSNR, SSIM, SSIM (dB), "
13
- "I count, I ave-QP, I kpbs, I-PSNR Y, I-PSNR U, I-PSNR V, I-SSIM (dB), "
14
- "P count, P ave-QP, P kpbs, P-PSNR Y, P-PSNR U, P-PSNR V, P-SSIM (dB), "
15
- "B count, B ave-QP, B kpbs, B-PSNR Y, B-PSNR U, B-PSNR V, B-SSIM (dB), "
16
- "Version\n";
17
-
18
static const char* defaultAnalysisFileName = "x265_analysis.dat";
19
20
-using namespace x265;
21
+using namespace X265_NS;
22
23
Encoder::Encoder()
24
{
25
26
m_exportedPic = NULL;
27
m_numDelayedPic = 0;
28
m_outputCount = 0;
29
- m_csvfpt = NULL;
30
m_param = NULL;
31
m_latestParam = NULL;
32
m_cuOffsetY = NULL;
33
34
35
// Do not allow WPP if only one row or fewer than 3 columns, it is pointless and unstable
36
if (rows == 1 || cols < 3)
37
+ {
38
+ x265_log(p, X265_LOG_WARNING, "Too few rows/columns, --wpp disabled\n");
39
p->bEnableWavefront = 0;
40
+ }
41
42
bool allowPools = !p->numaPools || strcmp(p->numaPools, "none");
43
44
45
p->bEnableWavefront = p->bDistributeModeAnalysis = p->bDistributeMotionEstimation = p->lookaheadSlices = 0;
46
}
47
48
+ if (!p->bEnableWavefront && p->rc.vbvBufferSize)
49
+ {
50
+ x265_log(p, X265_LOG_ERROR, "VBV requires wavefront parallelism\n");
51
+ m_aborted = true;
52
+ }
53
+
54
char buf[128];
55
int len = 0;
56
if (p->bEnableWavefront)
57
58
initSPS(&m_sps);
59
initPPS(&m_pps);
60
61
- /* Try to open CSV file handle */
62
- if (m_param->csvfn)
63
- {
64
- m_csvfpt = fopen(m_param->csvfn, "r");
65
- if (m_csvfpt)
66
- {
67
- /* file already exists, re-open for append */
68
- fclose(m_csvfpt);
69
- m_csvfpt = fopen(m_param->csvfn, "ab");
70
- }
71
- else
72
- {
73
- /* new CSV file, write header */
74
- m_csvfpt = fopen(m_param->csvfn, "wb");
75
- if (m_csvfpt)
76
- {
77
- if (m_param->logLevel >= X265_LOG_FRAME)
78
- {
79
- fprintf(m_csvfpt, "Encode Order, Type, POC, QP, Bits, ");
80
- if (m_param->rc.rateControlMode == X265_RC_CRF)
81
- fprintf(m_csvfpt, "RateFactor, ");
82
- fprintf(m_csvfpt, "Y PSNR, U PSNR, V PSNR, YUV PSNR, SSIM, SSIM (dB), List 0, List 1");
83
- /* detailed performance statistics */
84
- fprintf(m_csvfpt, ", DecideWait (ms), Row0Wait (ms), Wall time (ms), Ref Wait Wall (ms), Total CTU time (ms), Stall Time (ms), Avg WPP, Row Blocks\n");
85
- }
86
- else
87
- fputs(summaryCSVHeader, m_csvfpt);
88
- }
89
- }
90
-
91
- if (!m_csvfpt)
92
- {
93
- x265_log(m_param, X265_LOG_ERROR, "Unable to open CSV log file <%s>, aborting\n", m_param->csvfn);
94
- m_aborted = true;
95
- }
96
- }
97
-
98
int numRows = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
99
int numCols = (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
100
for (int i = 0; i < m_param->frameNumThreads; i++)
101
102
103
if (m_analysisFile)
104
fclose(m_analysisFile);
105
- if (m_csvfpt)
106
- fclose(m_csvfpt);
107
108
if (m_param)
109
{
110
111
free((char*)m_param->rc.statFileName);
112
free((char*)m_param->analysisFileName);
113
free((char*)m_param->scalingLists);
114
- free((char*)m_param->csvfn);
115
free((char*)m_param->numaPools);
116
free((char*)m_param->masteringDisplayColorVolume);
117
free((char*)m_param->contentLightLevelInfo);
118
119
- x265_param_free(m_param);
120
+ PARAM_NS::x265_param_free(m_param);
121
}
122
123
- x265_param_free(m_latestParam);
124
+ PARAM_NS::x265_param_free(m_latestParam);
125
}
126
127
void Encoder::updateVbvPlan(RateControl* rc)
128
129
if (outFrame)
130
{
131
Slice *slice = outFrame->m_encData->m_slice;
132
+ x265_frame_stats* frameData = NULL;
133
134
/* Free up pic_in->analysisData since it has already been used */
135
if (m_param->analysisMode == X265_ANALYSIS_LOAD)
136
137
pic_out->bitDepth = X265_DEPTH;
138
pic_out->userData = outFrame->m_userData;
139
pic_out->colorSpace = m_param->internalCsp;
140
+ frameData = &(pic_out->frameData);
141
142
pic_out->pts = outFrame->m_pts;
143
pic_out->dts = outFrame->m_dts;
144
145
if (m_aborted)
146
return -1;
147
148
- finishFrameStats(outFrame, curEncoder, curEncoder->m_accessUnitBits);
149
+ finishFrameStats(outFrame, curEncoder, curEncoder->m_accessUnitBits, frameData);
150
+
151
+ /* Write RateControl Frame level stats in multipass encodes */
152
+ if (m_param->rc.bStatWrite)
153
+ if (m_rateControl->writeRateControlFrameStats(outFrame, &curEncoder->m_rce))
154
+ m_aborted = true;
155
156
/* Allow this frame to be recycled if no frame encoders are using it for reference */
157
if (!pic_out)
158
159
m_aborted = true;
160
}
161
else if (m_encodedFrameNum)
162
- m_rateControl->setFinalFrameCount(m_encodedFrameNum);
163
+ m_rateControl->setFinalFrameCount(m_encodedFrameNum);
164
}
165
while (m_bZeroLatency && ++pass < 2);
166
167
168
m_totalQp += aveQp;
169
}
170
171
-char* Encoder::statsCSVString(EncStats& stat, char* buffer)
172
-{
173
- if (!stat.m_numPics)
174
- {
175
- sprintf(buffer, "-, -, -, -, -, -, -, ");
176
- return buffer;
177
- }
178
-
179
- double fps = (double)m_param->fpsNum / m_param->fpsDenom;
180
- double scale = fps / 1000 / (double)stat.m_numPics;
181
-
182
- int len = sprintf(buffer, "%-6u, ", stat.m_numPics);
183
-
184
- len += sprintf(buffer + len, "%2.2lf, ", stat.m_totalQp / (double)stat.m_numPics);
185
- len += sprintf(buffer + len, "%-8.2lf, ", stat.m_accBits * scale);
186
- if (m_param->bEnablePsnr)
187
- {
188
- len += sprintf(buffer + len, "%.3lf, %.3lf, %.3lf, ",
189
- stat.m_psnrSumY / (double)stat.m_numPics,
190
- stat.m_psnrSumU / (double)stat.m_numPics,
191
- stat.m_psnrSumV / (double)stat.m_numPics);
192
- }
193
- else
194
- len += sprintf(buffer + len, "-, -, -, ");
195
-
196
- if (m_param->bEnableSsim)
197
- sprintf(buffer + len, "%.3lf, ", x265_ssim2dB(stat.m_globalSsim / (double)stat.m_numPics));
198
- else
199
- sprintf(buffer + len, "-, ");
200
- return buffer;
201
x265_1.7.tar.gz/source/encoder/encoder.h -> x265_1.8.tar.gz/source/encoder/encoder.h
Changed
42
1
2
3
struct x265_encoder {};
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
extern const char g_sliceTypeToChar[3];
9
10
11
EncStats m_analyzeI;
12
EncStats m_analyzeP;
13
EncStats m_analyzeB;
14
- FILE* m_csvfpt;
15
int64_t m_encodeStartTime;
16
17
// weighted prediction
18
19
20
void fetchStats(x265_stats* stats, size_t statsSizeBytes);
21
22
- void writeLog(int argc, char **argv);
23
-
24
void printSummary();
25
26
char* statsString(EncStats&, char*);
27
28
- char* statsCSVString(EncStats& stat, char* buffer);
29
-
30
void configure(x265_param *param);
31
32
void updateVbvPlan(RateControl* rc);
33
34
35
void writeAnalysisFile(x265_analysis_data* pic);
36
37
- void finishFrameStats(Frame* pic, FrameEncoder *curEncoder, uint64_t bits);
38
+ void finishFrameStats(Frame* pic, FrameEncoder *curEncoder, uint64_t bits, x265_frame_stats* frameStats);
39
40
protected:
41
42
x265_1.7.tar.gz/source/encoder/entropy.cpp -> x265_1.8.tar.gz/source/encoder/entropy.cpp
Changed
201
1
2
#define CU_DQP_EG_k 0 // exp-golomb order
3
#define START_VALUE 8 // start value for dpcm mode
4
5
-static const uint32_t g_puOffset[8] = { 0, 8, 4, 4, 2, 10, 1, 5 };
6
-
7
-namespace x265 {
8
+namespace X265_NS {
9
10
Entropy::Entropy()
11
{
12
13
WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_420chroma_constraint_flag");
14
WRITE_FLAG(csp == X265_CSP_I400, "general_max_monochrome_constraint_flag");
15
WRITE_FLAG(ptl.intraConstraintFlag, "general_intra_constraint_flag");
16
- WRITE_FLAG(0, "general_one_picture_only_constraint_flag");
17
+ WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
18
WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
19
WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[0..15]");
20
WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[16..31]");
21
22
void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
23
{
24
X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
25
- PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
26
- uint32_t numPU = (partSize == SIZE_2Nx2N ? 1 : (partSize == SIZE_NxN ? 4 : 2));
27
- uint32_t depth = cu.m_cuDepth[absPartIdx];
28
- uint32_t puOffset = (g_puOffset[uint32_t(partSize)] << (g_unitSizeDepth - depth) * 2) >> 4;
29
+ uint32_t numPU = cu.getNumPartInter(absPartIdx);
30
31
- for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += puOffset)
32
+ for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
33
{
34
codeMergeFlag(cu, subPartIdx);
35
if (cu.m_mergeFlag[subPartIdx])
36
37
encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
38
}
39
40
+#if CHECKED_BUILD || _DEBUG
41
+uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
42
+{
43
+ uint32_t goRiceParam = 0;
44
+ int firstCoeff2 = 1;
45
+ uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
46
+
47
+ uint32_t sum = 0;
48
+ int idx = 0;
49
+ do
50
+ {
51
+ int baseLevel = (baseLevelN & 3) | firstCoeff2;
52
+ X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
53
+ baseLevelN >>= 2;
54
+ int codeNumber = absCoeff[idx] - baseLevel;
55
+
56
+ if (codeNumber >= 0)
57
+ {
58
+ //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
59
+ uint32_t length = 0;
60
+
61
+ codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
62
+ if (codeNumber >= 0)
63
+ {
64
+ {
65
+ unsigned long cidx;
66
+ CLZ(cidx, codeNumber + 1);
67
+ length = cidx;
68
+ }
69
+ X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
70
+
71
+ codeNumber = (length + length);
72
+ }
73
+ sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
74
+
75
+ if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
76
+ goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
77
+ X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
78
+ }
79
+ if (absCoeff[idx] >= 2)
80
+ firstCoeff2 = 0;
81
+ idx++;
82
+ }
83
+ while(idx < numNonZero);
84
+
85
+ return sum;
86
+}
87
+#endif // debug only code
88
+
89
void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
90
{
91
uint32_t trSize = 1 << log2TrSize;
92
93
// compute number of significant coefficients
94
uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
95
X265_CHECK(numSig > 0, "cbf check fail\n");
96
- bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled && !tqBypass;
97
+ bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
98
99
if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
100
codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
101
102
if (codingParameters.scanType == SCAN_VER)
103
std::swap(pos[0], pos[1]);
104
105
- int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + ((log2TrSize - 1) >> 2)) : NUM_CTX_LAST_FLAG_XY_LUMA;
106
- int ctxShift = bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2;
107
+ int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
108
+ int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
109
uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
110
+ X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
111
+ X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
112
113
uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
114
for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
115
116
uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
117
uint32_t c1 = 1;
118
int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
119
- int absCoeff[1 << MLS_CG_SIZE];
120
- int numNonZero = 1;
121
+ ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE)]);
122
+ uint32_t numNonZero = 1;
123
unsigned long lastNZPosInCG;
124
unsigned long firstNZPosInCG;
125
126
- absCoeff[0] = int(abs(coeff[posLast]));
127
+ absCoeff[0] = (uint16_t)abs(coeff[posLast]);
128
129
for (int subSet = lastScanSet; subSet >= 0; subSet--)
130
{
131
132
133
// encode significant_coeffgroup_flag
134
const int cgBlkPos = codingParameters.scanCG[subSet];
135
- const int cgPosY = cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
136
+ const int cgPosY = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
137
const int cgPosX = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
138
const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
139
140
141
}
142
143
// encode significant_coeff_flag
144
- if (sigCoeffGroupFlag64 & cgBlkPosMask)
145
+ if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
146
{
147
X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
148
const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
149
const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
150
151
- static const uint8_t ctxIndMap4x4[16] =
152
- {
153
- 0, 1, 4, 5,
154
- 2, 3, 4, 5,
155
- 6, 6, 8, 8,
156
- 7, 7, 8, 8
157
- };
158
// NOTE: [patternSigCtx][posXinSubset][posYinSubset]
159
- static const uint8_t table_cnt[4][SCAN_SET_SIZE] =
160
+ static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
161
{
162
// patternSigCtx = 0
163
{
164
165
2, 2, 2, 2,
166
2, 2, 2, 2,
167
2, 2, 2, 2,
168
+ },
169
+ // 4x4
170
+ {
171
+ 0, 1, 4, 5,
172
+ 2, 3, 4, 5,
173
+ 6, 6, 8, 8,
174
+ 7, 7, 8, 8
175
}
176
};
177
178
const int offset = codingParameters.firstSignificanceMapContext;
179
- ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
180
- // TODO: accelerate by PABSW
181
const uint32_t blkPosBase = codingParameters.scan[subPosBase];
182
- for (int i = 0; i < MLS_CG_SIZE; i++)
183
- {
184
- tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
185
- tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
186
- tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
187
- tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
188
- }
189
190
+ X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
191
if (m_bitIf)
192
{
193
+ ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
194
+
195
+ // TODO: accelerate by PABSW
196
+ for (int i = 0; i < MLS_CG_SIZE; i++)
197
+ {
198
+ tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
199
+ tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
200
+ tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
201
x265_1.7.tar.gz/source/encoder/entropy.h -> x265_1.8.tar.gz/source/encoder/entropy.h
Changed
10
1
2
#include "contexts.h"
3
#include "slice.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
struct SaoCtuParam;
10
x265_1.7.tar.gz/source/encoder/frameencoder.cpp -> x265_1.8.tar.gz/source/encoder/frameencoder.cpp
Changed
201
1
2
#include "slicetype.h"
3
#include "nal.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
void weightAnalyse(Slice& slice, Frame& frame, x265_param& param);
8
9
FrameEncoder::FrameEncoder()
10
11
m_cuGeoms = NULL;
12
m_ctuGeomMap = NULL;
13
m_localTldIdx = 0;
14
- memset(&m_frameStats, 0, sizeof(m_frameStats));
15
memset(&m_rce, 0, sizeof(RateControlEntry));
16
}
17
18
19
m_SSDY = m_SSDU = m_SSDV = 0;
20
m_ssim = 0;
21
m_ssimCnt = 0;
22
- memset(&m_frameStats, 0, sizeof(m_frameStats));
23
+ memset(&(m_frame->m_encData->m_frameStats), 0, sizeof(m_frame->m_encData->m_frameStats));
24
25
/* Emit access unit delimiter unless this is the first frame and the user is
26
* not repeating headers (since AUD is supposed to be the first NAL in the access
27
28
29
m_top->m_lastBPSEI = m_rce.encodeOrder;
30
}
31
-
32
- // The recovery point SEI message assists a decoder in determining when the decoding
33
- // process will produce acceptable pictures for display after the decoder initiates
34
- // random access. The m_recoveryPocCnt is in units of POC(picture order count) which
35
- // means pictures encoded after the CRA but precede it in display order(leading) are
36
- // implicitly discarded after a random access seek regardless of the value of
37
- // m_recoveryPocCnt. Our encoder does not use references prior to the most recent CRA,
38
- // so all pictures following the CRA in POC order are guaranteed to be displayable,
39
- // so m_recoveryPocCnt is always 0.
40
- SEIRecoveryPoint sei_recovery_point;
41
- sei_recovery_point.m_recoveryPocCnt = 0;
42
- sei_recovery_point.m_exactMatchingFlag = true;
43
- sei_recovery_point.m_brokenLinkFlag = false;
44
-
45
- m_bs.resetBits();
46
- sei_recovery_point.write(m_bs, *slice->m_sps);
47
- m_bs.writeByteAlignment();
48
-
49
- m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
50
}
51
52
if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)
53
54
m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
55
}
56
57
+ /* CQP and CRF (without capped VBV) doesn't use mid-frame statistics to
58
+ * tune RateControl parameters for other frames.
59
+ * Hence, for these modes, update m_startEndOrder and unlock RC for previous threads waiting in
60
+ * RateControlEnd here, after the slicecontexts are initialized. For the rest - ABR
61
+ * and VBV, unlock only after rateControlUpdateStats of this frame is called */
62
+ if (m_param->rc.rateControlMode != X265_RC_ABR && !m_top->m_rateControl->m_isVbv)
63
+ {
64
+ m_top->m_rateControl->m_startEndOrder.incr();
65
+
66
+ if (m_rce.encodeOrder < m_param->frameNumThreads - 1)
67
+ m_top->m_rateControl->m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames
68
+ }
69
+
70
/* Analyze CTU rows, most of the hard work is done here. Frame is
71
* compressed in a wave-front pattern if WPP is enabled. Row based loop
72
* filters runs behind the CTU compression and reconstruction */
73
74
// accumulate intra,inter,skip cu count per frame for 2 pass
75
for (uint32_t i = 0; i < m_numRows; i++)
76
{
77
- m_frameStats.mvBits += m_rows[i].rowStats.mvBits;
78
- m_frameStats.coeffBits += m_rows[i].rowStats.coeffBits;
79
- m_frameStats.miscBits += m_rows[i].rowStats.miscBits;
80
- totalI += m_rows[i].rowStats.iCuCnt;
81
- totalP += m_rows[i].rowStats.pCuCnt;
82
- totalSkip += m_rows[i].rowStats.skipCuCnt;
83
+ m_frame->m_encData->m_frameStats.mvBits += m_rows[i].rowStats.mvBits;
84
+ m_frame->m_encData->m_frameStats.coeffBits += m_rows[i].rowStats.coeffBits;
85
+ m_frame->m_encData->m_frameStats.miscBits += m_rows[i].rowStats.miscBits;
86
+ totalI += m_rows[i].rowStats.intra8x8Cnt;
87
+ totalP += m_rows[i].rowStats.inter8x8Cnt;
88
+ totalSkip += m_rows[i].rowStats.skip8x8Cnt;
89
}
90
int totalCuCount = totalI + totalP + totalSkip;
91
- m_frameStats.percentIntra = (double)totalI / totalCuCount;
92
- m_frameStats.percentInter = (double)totalP / totalCuCount;
93
- m_frameStats.percentSkip = (double)totalSkip / totalCuCount;
94
+ m_frame->m_encData->m_frameStats.percent8x8Intra = (double)totalI / totalCuCount;
95
+ m_frame->m_encData->m_frameStats.percent8x8Inter = (double)totalP / totalCuCount;
96
+ m_frame->m_encData->m_frameStats.percent8x8Skip = (double)totalSkip / totalCuCount;
97
+ }
98
+ for (uint32_t i = 0; i < m_numRows; i++)
99
+ {
100
+ m_frame->m_encData->m_frameStats.cntIntraNxN += m_rows[i].rowStats.cntIntraNxN;
101
+ m_frame->m_encData->m_frameStats.totalCu += m_rows[i].rowStats.totalCu;
102
+ m_frame->m_encData->m_frameStats.totalCtu += m_rows[i].rowStats.totalCtu;
103
+ m_frame->m_encData->m_frameStats.lumaDistortion += m_rows[i].rowStats.lumaDistortion;
104
+ m_frame->m_encData->m_frameStats.chromaDistortion += m_rows[i].rowStats.chromaDistortion;
105
+ m_frame->m_encData->m_frameStats.psyEnergy += m_rows[i].rowStats.psyEnergy;
106
+ m_frame->m_encData->m_frameStats.lumaLevel += m_rows[i].rowStats.lumaLevel;
107
+
108
+ if (m_rows[i].rowStats.maxLumaLevel > m_frame->m_encData->m_frameStats.maxLumaLevel)
109
+ m_frame->m_encData->m_frameStats.maxLumaLevel = m_rows[i].rowStats.maxLumaLevel;
110
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
111
+ {
112
+ m_frame->m_encData->m_frameStats.cntSkipCu[depth] += m_rows[i].rowStats.cntSkipCu[depth];
113
+ m_frame->m_encData->m_frameStats.cntMergeCu[depth] += m_rows[i].rowStats.cntMergeCu[depth];
114
+ for (int m = 0; m < INTER_MODES; m++)
115
+ m_frame->m_encData->m_frameStats.cuInterDistribution[depth][m] += m_rows[i].rowStats.cuInterDistribution[depth][m];
116
+ for (int n = 0; n < INTRA_MODES; n++)
117
+ m_frame->m_encData->m_frameStats.cuIntraDistribution[depth][n] += m_rows[i].rowStats.cuIntraDistribution[depth][n];
118
+ }
119
+ }
120
+ m_frame->m_encData->m_frameStats.avgLumaDistortion = (double)(m_frame->m_encData->m_frameStats.lumaDistortion) / m_frame->m_encData->m_frameStats.totalCtu;
121
+ m_frame->m_encData->m_frameStats.avgChromaDistortion = (double)(m_frame->m_encData->m_frameStats.chromaDistortion) / m_frame->m_encData->m_frameStats.totalCtu;
122
+ m_frame->m_encData->m_frameStats.avgPsyEnergy = (double)(m_frame->m_encData->m_frameStats.psyEnergy) / m_frame->m_encData->m_frameStats.totalCtu;
123
+ m_frame->m_encData->m_frameStats.avgLumaLevel = m_frame->m_encData->m_frameStats.lumaLevel / m_frame->m_encData->m_frameStats.totalCtu;
124
+ m_frame->m_encData->m_frameStats.percentIntraNxN = (double)(m_frame->m_encData->m_frameStats.cntIntraNxN * 100) / m_frame->m_encData->m_frameStats.totalCu;
125
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
126
+ {
127
+ m_frame->m_encData->m_frameStats.percentSkipCu[depth] = (double)(m_frame->m_encData->m_frameStats.cntSkipCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;
128
+ m_frame->m_encData->m_frameStats.percentMergeCu[depth] = (double)(m_frame->m_encData->m_frameStats.cntMergeCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;
129
+ for (int n = 0; n < INTRA_MODES; n++)
130
+ m_frame->m_encData->m_frameStats.percentIntraDistribution[depth][n] = (double)(m_frame->m_encData->m_frameStats.cuIntraDistribution[depth][n] * 100) / m_frame->m_encData->m_frameStats.totalCu;
131
+ uint64_t cuInterRectCnt = 0; // sum of Nx2N, 2NxN counts
132
+ cuInterRectCnt += m_frame->m_encData->m_frameStats.cuInterDistribution[depth][1] + m_frame->m_encData->m_frameStats.cuInterDistribution[depth][2];
133
+ m_frame->m_encData->m_frameStats.percentInterDistribution[depth][0] = (double)(m_frame->m_encData->m_frameStats.cuInterDistribution[depth][0] * 100) / m_frame->m_encData->m_frameStats.totalCu;
134
+ m_frame->m_encData->m_frameStats.percentInterDistribution[depth][1] = (double)(cuInterRectCnt * 100) / m_frame->m_encData->m_frameStats.totalCu;
135
+ m_frame->m_encData->m_frameStats.percentInterDistribution[depth][2] = (double)(m_frame->m_encData->m_frameStats.cuInterDistribution[depth][3] * 100) / m_frame->m_encData->m_frameStats.totalCu;
136
}
137
138
m_bs.resetBits();
139
140
m_endCompressTime = x265_mdate();
141
142
/* rateControlEnd may also block for earlier frames to call rateControlUpdateStats */
143
- if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits, &m_rce, &m_frameStats) < 0)
144
+ if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits, &m_rce) < 0)
145
m_top->m_aborted = true;
146
147
/* Decrement referenced frame reference counts, allow them to be recycled */
148
149
const uint32_t lineStartCUAddr = row * numCols;
150
bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
151
152
- /* These store the count of inter, intra and skip cus within quad tree structure of each CTU */
153
- uint32_t qTreeInterCnt[NUM_CU_DEPTH];
154
- uint32_t qTreeIntraCnt[NUM_CU_DEPTH];
155
- uint32_t qTreeSkipCnt[NUM_CU_DEPTH];
156
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
157
- qTreeIntraCnt[depth] = qTreeInterCnt[depth] = qTreeSkipCnt[depth] = 0;
158
-
159
while (curRow.completed < numCols)
160
{
161
ProfileScopeEvent(encodeCTU);
162
163
// Completed CU processing
164
curRow.completed++;
165
166
- if (m_param->bLogCuStats || m_param->rc.bStatWrite)
167
- curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, qTreeInterCnt, qTreeIntraCnt, qTreeSkipCnt);
168
- else if (m_param->rc.aqMode)
169
- curEncData.m_rowStat[row].sumQpAq += calcCTUQP(*ctu);
170
+ FrameStats frameLog;
171
+ curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, &frameLog);
172
173
// copy no. of intra, inter Cu cnt per row into frame stats for 2 pass
174
if (m_param->rc.bStatWrite)
175
{
176
- curRow.rowStats.mvBits += best.mvBits;
177
+ curRow.rowStats.mvBits += best.mvBits;
178
curRow.rowStats.coeffBits += best.coeffBits;
179
- curRow.rowStats.miscBits += best.totalBits - (best.mvBits + best.coeffBits);
180
+ curRow.rowStats.miscBits += best.totalBits - (best.mvBits + best.coeffBits);
181
182
for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
183
{
184
/* 1 << shift == number of 8x8 blocks at current depth */
185
int shift = 2 * (g_maxCUDepth - depth);
186
- curRow.rowStats.iCuCnt += qTreeIntraCnt[depth] << shift;
187
- curRow.rowStats.pCuCnt += qTreeInterCnt[depth] << shift;
188
- curRow.rowStats.skipCuCnt += qTreeSkipCnt[depth] << shift;
189
+ int cuSize = g_maxCUSize >> depth;
190
191
- // clear the row cu data from thread local object
192
- qTreeIntraCnt[depth] = qTreeInterCnt[depth] = qTreeSkipCnt[depth] = 0;
193
+ if (cuSize == 8)
194
+ curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN);
195
+ else
196
+ curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] << shift);
197
+
198
+ curRow.rowStats.inter8x8Cnt += (int)(frameLog.cntInter[depth] << shift);
199
+ curRow.rowStats.skip8x8Cnt += (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) << shift);
200
}
201
x265_1.7.tar.gz/source/encoder/frameencoder.h -> x265_1.8.tar.gz/source/encoder/frameencoder.h
Changed
38
1
2
#include "reference.h"
3
#include "nal.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private x265 namespace
8
9
class ThreadPool;
10
11
12
#define ANGULAR_MODE_ID 2
13
#define AMP_ID 3
14
-#define INTER_MODES 4
15
-#define INTRA_MODES 3
16
17
struct StatisticLog
18
{
19
20
MD5Context m_state[3];
21
uint32_t m_crc[3];
22
uint32_t m_checksum[3];
23
- StatisticLog m_sliceTypeLog[3]; // per-slice type CU statistics
24
- FrameStats m_frameStats; // stats of current frame for multi-pass encodes
25
26
volatile int m_activeWorkerCount; // count of workers currently encoding or filtering CTUs
27
volatile int m_totalActiveWorkerCount; // sum of m_activeWorkerCount sampled at end of each CTU
28
29
void encodeSlice();
30
31
void threadMain();
32
- int collectCTUStatistics(const CUData& ctu, uint32_t* qtreeInterCnt, uint32_t* qtreeIntraCnt, uint32_t* qtreeSkipCnt);
33
- int calcCTUQP(const CUData& ctu);
34
+ int collectCTUStatistics(const CUData& ctu, FrameStats* frameLog);
35
void noiseReductionUpdate();
36
37
/* Called by WaveFront::findJob() */
38
x265_1.7.tar.gz/source/encoder/framefilter.cpp -> x265_1.8.tar.gz/source/encoder/framefilter.cpp
Changed
10
1
2
#include "frameencoder.h"
3
#include "wavefront.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride, uint32_t width, uint32_t height);
9
static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt);
10
x265_1.7.tar.gz/source/encoder/framefilter.h -> x265_1.8.tar.gz/source/encoder/framefilter.h
Changed
10
1
2
#include "deblock.h"
3
#include "sao.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private x265 namespace
8
9
class Encoder;
10
x265_1.7.tar.gz/source/encoder/level.cpp -> x265_1.8.tar.gz/source/encoder/level.cpp
Changed
201
1
2
#include "slice.h"
3
#include "level.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
typedef struct
8
{
9
uint32_t maxLumaSamples;
10
11
/* determine minimum decoder level required to decode the described video */
12
void determineLevel(const x265_param ¶m, VPS& vps)
13
{
14
+ vps.ptl.onePictureOnlyConstraintFlag = param.totalFrames == 1;
15
+ vps.ptl.intraConstraintFlag = param.keyframeMax <= 1 || vps.ptl.onePictureOnlyConstraintFlag;
16
+ vps.ptl.bitDepthConstraint = param.internalBitDepth;
17
+ vps.ptl.chromaFormatConstraint = param.internalCsp;
18
+
19
+ /* TODO: figure out HighThroughput signaling, aka: HbrFactor in section A.4.2, only available
20
+ * for intra-only profiles (vps.ptl.intraConstraintFlag) */
21
+ vps.ptl.lowerBitRateConstraintFlag = true;
22
+
23
vps.maxTempSubLayers = param.bEnableTemporalSubLayers ? 2 : 1;
24
- if (param.internalCsp == X265_CSP_I420)
25
+
26
+ if (param.internalCsp == X265_CSP_I420 && param.internalBitDepth <= 10)
27
{
28
- if (param.internalBitDepth == 8)
29
+ /* Probably an HEVC v1 profile, but must check to be sure */
30
+ if (param.internalBitDepth <= 8)
31
{
32
- if (param.keyframeMax == 1 && param.maxNumReferences == 1)
33
+ if (vps.ptl.onePictureOnlyConstraintFlag)
34
vps.ptl.profileIdc = Profile::MAINSTILLPICTURE;
35
+ else if (vps.ptl.intraConstraintFlag)
36
+ vps.ptl.profileIdc = Profile::MAINREXT; /* Main Intra */
37
else
38
vps.ptl.profileIdc = Profile::MAIN;
39
}
40
- else if (param.internalBitDepth == 10)
41
- vps.ptl.profileIdc = Profile::MAIN10;
42
+ else if (param.internalBitDepth <= 10)
43
+ {
44
+ /* note there is no 10bit still picture profile */
45
+ if (vps.ptl.intraConstraintFlag)
46
+ vps.ptl.profileIdc = Profile::MAINREXT; /* Main10 Intra */
47
+ else
48
+ vps.ptl.profileIdc = Profile::MAIN10;
49
+ }
50
}
51
else
52
vps.ptl.profileIdc = Profile::MAINREXT;
53
54
return;
55
}
56
57
-#define CHECK_RANGE(value, main, high) (value > main && value <= high)
58
+#define CHECK_RANGE(value, main, high) (high != MAX_UINT && value > main && value <= high)
59
60
- if (CHECK_RANGE(bitrate, levels[i].maxBitrateMain, levels[i].maxBitrateHigh) &&
61
- CHECK_RANGE((uint32_t)param.rc.vbvBufferSize, levels[i].maxCpbSizeMain, levels[i].maxCpbSizeHigh) &&
62
- levels[i].maxBitrateHigh != MAX_UINT)
63
+ if (CHECK_RANGE(bitrate, levels[i].maxBitrateMain, levels[i].maxBitrateHigh) ||
64
+ CHECK_RANGE((uint32_t)param.rc.vbvBufferSize, levels[i].maxCpbSizeMain, levels[i].maxCpbSizeHigh))
65
{
66
- /* If the user has not enabled high tier, continue looking to see if we can encode at a higher level, main tier */
67
- if (!param.bHighTier && (levels[i].levelIdc < param.levelIdc))
68
- continue;
69
- else
70
+ /* The bitrate or buffer size are out of range for Main tier, but in
71
+ * range for High tier. If the user requested High tier then give
72
+ * them High tier at this level. Otherwise allow the loop to
73
+ * progress to the Main tier of the next level */
74
+ if (param.bHighTier)
75
vps.ptl.tierFlag = Level::HIGH;
76
+ else
77
+ continue;
78
}
79
else
80
vps.ptl.tierFlag = Level::MAIN;
81
82
break;
83
}
84
85
- vps.ptl.intraConstraintFlag = false;
86
- vps.ptl.lowerBitRateConstraintFlag = true;
87
- vps.ptl.bitDepthConstraint = param.internalBitDepth;
88
- vps.ptl.chromaFormatConstraint = param.internalCsp;
89
-
90
static const char *profiles[] = { "None", "Main", "Main 10", "Main Still Picture", "RExt" };
91
static const char *tiers[] = { "Main", "High" };
92
93
- const char *profile = profiles[vps.ptl.profileIdc];
94
+ char profbuf[64];
95
+ strcpy(profbuf, profiles[vps.ptl.profileIdc]);
96
+
97
+ bool bStillPicture = false;
98
if (vps.ptl.profileIdc == Profile::MAINREXT)
99
{
100
- if (param.internalCsp == X265_CSP_I422)
101
- profile = "Main 4:2:2 10";
102
- if (param.internalCsp == X265_CSP_I444)
103
+ if (vps.ptl.bitDepthConstraint > 12 && vps.ptl.intraConstraintFlag)
104
+ {
105
+ if (vps.ptl.onePictureOnlyConstraintFlag)
106
+ {
107
+ strcpy(profbuf, "Main 4:4:4 16 Still Picture");
108
+ bStillPicture = true;
109
+ }
110
+ else
111
+ strcpy(profbuf, "Main 4:4:4 16");
112
+ }
113
+ else if (param.internalCsp == X265_CSP_I420)
114
+ {
115
+ X265_CHECK(vps.ptl.intraConstraintFlag || vps.ptl.bitDepthConstraint > 10, "rext fail\n");
116
+ if (vps.ptl.bitDepthConstraint <= 8)
117
+ strcpy(profbuf, "Main");
118
+ else if (vps.ptl.bitDepthConstraint <= 10)
119
+ strcpy(profbuf, "Main 10");
120
+ else if (vps.ptl.bitDepthConstraint <= 12)
121
+ strcpy(profbuf, "Main 12");
122
+ }
123
+ else if (param.internalCsp == X265_CSP_I422)
124
+ {
125
+ /* there is no Main 4:2:2 profile, so it must be signaled as Main10 4:2:2 */
126
+ if (param.internalBitDepth <= 10)
127
+ strcpy(profbuf, "Main 4:2:2 10");
128
+ else if (vps.ptl.bitDepthConstraint <= 12)
129
+ strcpy(profbuf, "Main 4:2:2 12");
130
+ }
131
+ else if (param.internalCsp == X265_CSP_I444)
132
{
133
if (vps.ptl.bitDepthConstraint <= 8)
134
- profile = "Main 4:4:4 8";
135
+ {
136
+ if (vps.ptl.onePictureOnlyConstraintFlag)
137
+ {
138
+ strcpy(profbuf, "Main 4:4:4 Still Picture");
139
+ bStillPicture = true;
140
+ }
141
+ else
142
+ strcpy(profbuf, "Main 4:4:4");
143
+ }
144
else if (vps.ptl.bitDepthConstraint <= 10)
145
- profile = "Main 4:4:4 10";
146
+ strcpy(profbuf, "Main 4:4:4 10");
147
+ else if (vps.ptl.bitDepthConstraint <= 12)
148
+ strcpy(profbuf, "Main 4:4:4 12");
149
}
150
+ else
151
+ strcpy(profbuf, "Unknown");
152
+
153
+ if (vps.ptl.intraConstraintFlag && !bStillPicture)
154
+ strcat(profbuf, " Intra");
155
}
156
x265_log(¶m, X265_LOG_INFO, "%s profile, Level-%s (%s tier)\n",
157
- profile, levels[i].name, tiers[vps.ptl.tierFlag]);
158
+ profbuf, levels[i].name, tiers[vps.ptl.tierFlag]);
159
}
160
161
/* enforce a maximum decoder level requirement, in other words assure that a
162
163
164
return true;
165
}
166
+}
167
+
168
+#if EXPORT_C_API
169
+
170
+/* these functions are exported as C functions (default) */
171
+using namespace X265_NS;
172
+extern "C" {
173
+
174
+#else
175
+
176
+/* these functions exist within private namespace (multilib) */
177
+namespace X265_NS {
178
+
179
+#endif
180
181
-extern "C"
182
int x265_param_apply_profile(x265_param *param, const char *profile)
183
{
184
if (!param || !profile)
185
return 0;
186
187
-#if HIGH_BIT_DEPTH
188
- if (!strcmp(profile, "main") || !strcmp(profile, "mainstillpicture") || !strcmp(profile, "msp") || !strcmp(profile, "main444-8"))
189
- {
190
- x265_log(param, X265_LOG_ERROR, "%s profile not supported, compiled for Main10.\n", profile);
191
- return -1;
192
- }
193
-#else
194
- if (!strcmp(profile, "main10") || !strcmp(profile, "main422-10") || !strcmp(profile, "main444-10"))
195
- {
196
- x265_log(param, X265_LOG_ERROR, "%s profile not supported, compiled for Main.\n", profile);
197
- return -1;
198
- }
199
+ /* Check if profile bit-depth requirement is exceeded by internal bit depth */
200
+ bool bInvalidDepth = false;
201
x265_1.7.tar.gz/source/encoder/level.h -> x265_1.8.tar.gz/source/encoder/level.h
Changed
10
1
2
#include "common.h"
3
#include "x265.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// encoder private namespace
8
9
struct VPS;
10
x265_1.7.tar.gz/source/encoder/motion.cpp -> x265_1.8.tar.gz/source/encoder/motion.cpp
Changed
125
1
2
#pragma warning(disable: 4127) // conditional expression is constant (macros use this construct)
3
#endif
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
namespace {
9
10
11
{ 2, 8, 2, 8, true }, // 2x8 SATD HPEL + 2x8 SATD QPEL
12
};
13
14
-int sizeScale[NUM_PU_SIZES];
15
+static int sizeScale[NUM_PU_SIZES];
16
#define SAD_THRESH(v) (bcost < (((v >> 4) * sizeScale[partEnum])))
17
18
/* radius 2 hexagon. repeated entries are to avoid having to compute mod6 every time. */
19
20
pix_base + (m1x) + (m1y) * stride, \
21
pix_base + (m2x) + (m2y) * stride, \
22
stride, costs); \
23
- const uint16_t *base_mvx = &m_cost_mvx[(bmv.x + (m0x)) << 2]; \
24
- const uint16_t *base_mvy = &m_cost_mvy[(bmv.y + (m0y)) << 2]; \
25
- X265_CHECK(mvcost((bmv + MV(m0x, m0y)) << 2) == (base_mvx[((m0x) - (m0x)) << 2] + base_mvy[((m0y) - (m0y)) << 2]), "mvcost() check failure\n"); \
26
- X265_CHECK(mvcost((bmv + MV(m1x, m1y)) << 2) == (base_mvx[((m1x) - (m0x)) << 2] + base_mvy[((m1y) - (m0y)) << 2]), "mvcost() check failure\n"); \
27
- X265_CHECK(mvcost((bmv + MV(m2x, m2y)) << 2) == (base_mvx[((m2x) - (m0x)) << 2] + base_mvy[((m2y) - (m0y)) << 2]), "mvcost() check failure\n"); \
28
- (costs)[0] += (base_mvx[((m0x) - (m0x)) << 2] + base_mvy[((m0y) - (m0y)) << 2]); \
29
- (costs)[1] += (base_mvx[((m1x) - (m0x)) << 2] + base_mvy[((m1y) - (m0y)) << 2]); \
30
- (costs)[2] += (base_mvx[((m2x) - (m0x)) << 2] + base_mvy[((m2y) - (m0y)) << 2]); \
31
+ (costs)[0] += mvcost((bmv + MV(m0x, m0y)) << 2); \
32
+ (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
33
+ (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
34
}
35
36
#define COST_MV_PT_DIST_X4(m0x, m0y, p0, d0, m1x, m1y, p1, d1, m2x, m2y, p2, d2, m3x, m3y, p3, d3) \
37
38
pix_base + (m2x) + (m2y) * stride, \
39
pix_base + (m3x) + (m3y) * stride, \
40
stride, costs); \
41
- const uint16_t *base_mvx = &m_cost_mvx[(omv.x << 2)]; \
42
- const uint16_t *base_mvy = &m_cost_mvy[(omv.y << 2)]; \
43
- X265_CHECK(mvcost((omv + MV(m0x, m0y)) << 2) == (base_mvx[(m0x) << 2] + base_mvy[(m0y) << 2]), "mvcost() check failure\n"); \
44
- X265_CHECK(mvcost((omv + MV(m1x, m1y)) << 2) == (base_mvx[(m1x) << 2] + base_mvy[(m1y) << 2]), "mvcost() check failure\n"); \
45
- X265_CHECK(mvcost((omv + MV(m2x, m2y)) << 2) == (base_mvx[(m2x) << 2] + base_mvy[(m2y) << 2]), "mvcost() check failure\n"); \
46
- X265_CHECK(mvcost((omv + MV(m3x, m3y)) << 2) == (base_mvx[(m3x) << 2] + base_mvy[(m3y) << 2]), "mvcost() check failure\n"); \
47
- costs[0] += (base_mvx[(m0x) << 2] + base_mvy[(m0y) << 2]); \
48
- costs[1] += (base_mvx[(m1x) << 2] + base_mvy[(m1y) << 2]); \
49
- costs[2] += (base_mvx[(m2x) << 2] + base_mvy[(m2y) << 2]); \
50
- costs[3] += (base_mvx[(m3x) << 2] + base_mvy[(m3y) << 2]); \
51
+ costs[0] += mvcost((omv + MV(m0x, m0y)) << 2); \
52
+ costs[1] += mvcost((omv + MV(m1x, m1y)) << 2); \
53
+ costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \
54
+ costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \
55
COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
56
COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
57
COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
58
59
pix_base + (m2x) + (m2y) * stride, \
60
pix_base + (m3x) + (m3y) * stride, \
61
stride, costs); \
62
- /* TODO: use restrict keyword in ICL */ \
63
- const uint16_t *base_mvx = &m_cost_mvx[(bmv.x << 2)]; \
64
- const uint16_t *base_mvy = &m_cost_mvy[(bmv.y << 2)]; \
65
- X265_CHECK(mvcost((bmv + MV(m0x, m0y)) << 2) == (base_mvx[(m0x) << 2] + base_mvy[(m0y) << 2]), "mvcost() check failure\n"); \
66
- X265_CHECK(mvcost((bmv + MV(m1x, m1y)) << 2) == (base_mvx[(m1x) << 2] + base_mvy[(m1y) << 2]), "mvcost() check failure\n"); \
67
- X265_CHECK(mvcost((bmv + MV(m2x, m2y)) << 2) == (base_mvx[(m2x) << 2] + base_mvy[(m2y) << 2]), "mvcost() check failure\n"); \
68
- X265_CHECK(mvcost((bmv + MV(m3x, m3y)) << 2) == (base_mvx[(m3x) << 2] + base_mvy[(m3y) << 2]), "mvcost() check failure\n"); \
69
- (costs)[0] += (base_mvx[(m0x) << 2] + base_mvy[(m0y) << 2]); \
70
- (costs)[1] += (base_mvx[(m1x) << 2] + base_mvy[(m1y) << 2]); \
71
- (costs)[2] += (base_mvx[(m2x) << 2] + base_mvy[(m2y) << 2]); \
72
- (costs)[3] += (base_mvx[(m3x) << 2] + base_mvy[(m3y) << 2]); \
73
+ (costs)[0] += mvcost((bmv + MV(m0x, m0y)) << 2); \
74
+ (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
75
+ (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
76
+ (costs)[3] += mvcost((bmv + MV(m3x, m3y)) << 2); \
77
}
78
79
#define DIA1_ITER(mx, my) \
80
81
}
82
}
83
84
+ X265_CHECK(!(ref->isLowres && numCandidates), "lowres motion candidates not allowed\n")
85
// measure SAD cost at each QPEL motion vector candidate
86
- if (ref->isLowres)
87
- {
88
- for (int i = 0; i < numCandidates; i++)
89
- {
90
- MV m = mvc[i].clipped(qmvmin, qmvmax);
91
- if (m.notZero() && m != pmv && m != bestpre) // check already measured
92
- {
93
- int cost = ref->lowresQPelCost(fenc, blockOffset, m, sad) + mvcost(m);
94
- if (cost < bprecost)
95
- {
96
- bprecost = cost;
97
- bestpre = m;
98
- }
99
- }
100
- }
101
- }
102
- else
103
+ for (int i = 0; i < numCandidates; i++)
104
{
105
- for (int i = 0; i < numCandidates; i++)
106
+ MV m = mvc[i].clipped(qmvmin, qmvmax);
107
+ if (m.notZero() & (m != pmv ? 1 : 0) & (m != bestpre ? 1 : 0)) // check already measured
108
{
109
- MV m = mvc[i].clipped(qmvmin, qmvmax);
110
- if (m.notZero() && m != pmv && m != bestpre) // check already measured
111
+ int cost = subpelCompare(ref, m, sad) + mvcost(m);
112
+ if (cost < bprecost)
113
{
114
- int cost = subpelCompare(ref, m, sad) + mvcost(m);
115
- if (cost < bprecost)
116
- {
117
- bprecost = cost;
118
- bestpre = m;
119
- }
120
+ bprecost = cost;
121
+ bestpre = m;
122
}
123
}
124
}
125
x265_1.7.tar.gz/source/encoder/motion.h -> x265_1.8.tar.gz/source/encoder/motion.h
Changed
10
1
2
#include "bitcost.h"
3
#include "yuv.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private x265 namespace
8
9
class MotionEstimate : public BitCost
10
x265_1.7.tar.gz/source/encoder/nal.cpp -> x265_1.8.tar.gz/source/encoder/nal.cpp
Changed
10
1
2
#include "bitstream.h"
3
#include "nal.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
NALList::NALList()
9
: m_numNal(0)
10
x265_1.7.tar.gz/source/encoder/nal.h -> x265_1.8.tar.gz/source/encoder/nal.h
Changed
10
1
2
#include "common.h"
3
#include "x265.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class Bitstream;
10
x265_1.7.tar.gz/source/encoder/ratecontrol.cpp -> x265_1.8.tar.gz/source/encoder/ratecontrol.cpp
Changed
201
1
2
#define BR_SHIFT 6
3
#define CPB_SHIFT 4
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
/* Amortize the partial cost of I frames over the next N frames */
9
10
11
m_bTerminated = false;
12
m_finalFrameCount = 0;
13
m_numEntries = 0;
14
+ m_isSceneTransition = false;
15
+ m_lastPredictorReset = 0;
16
if (m_param->rc.rateControlMode == X265_RC_CRF)
17
{
18
m_param->rc.qp = (int)m_param->rc.rfConstant;
19
20
if(m_param->rc.bStrictCbr)
21
m_rateTolerance = 0.7;
22
23
- m_leadingBframes = m_param->bframes;
24
m_bframeBits = 0;
25
m_leadingNoBSatd = 0;
26
m_ipOffset = 6.0 * X265_LOG2(m_param->rc.ipFactor);
27
28
/* Adjust the first frame in order to stabilize the quality level compared to the rest */
29
#define ABR_INIT_QP_MIN (24)
30
#define ABR_INIT_QP_MAX (40)
31
+#define ABR_SCENECUT_INIT_QP_MIN (12)
32
#define CRF_INIT_QP (int)m_param->rc.rfConstant
33
for (int i = 0; i < 3; i++)
34
m_lastQScaleFor[i] = x265_qp2qScale(m_param->rc.rateControlMode == X265_RC_CRF ? CRF_INIT_QP : ABR_INIT_QP_MIN);
35
36
m_accumPNorm = .01;
37
m_accumPQp = (m_param->rc.rateControlMode == X265_RC_CRF ? CRF_INIT_QP : ABR_INIT_QP_MIN) * m_accumPNorm;
38
39
- /* Frame Predictors and Row predictors used in vbv */
40
- for (int i = 0; i < 4; i++)
41
- {
42
- m_pred[i].coeff = 1.0;
43
- m_pred[i].count = 1.0;
44
- m_pred[i].decay = 0.5;
45
- m_pred[i].offset = 0.0;
46
- }
47
- m_pred[0].coeff = m_pred[3].coeff = 0.75;
48
- if (m_param->rc.qCompress >= 0.8) // when tuned for grain
49
- {
50
- m_pred[1].coeff = 0.75;
51
- m_pred[0].coeff = m_pred[3].coeff = 0.50;
52
- }
53
+ /* Frame Predictors used in vbv */
54
+ initFramePredictors();
55
if (!m_statFileOut && (m_param->rc.bStatWrite || m_param->rc.bStatRead))
56
{
57
/* If the user hasn't defined the stat filename, use the default value */
58
59
return X265_TYPE_AUTO;
60
}
61
62
+void RateControl::initFramePredictors()
63
+{
64
+ /* Frame Predictors used in vbv */
65
+ for (int i = 0; i < 4; i++)
66
+ {
67
+ m_pred[i].coeff = 1.0;
68
+ m_pred[i].count = 1.0;
69
+ m_pred[i].decay = 0.5;
70
+ m_pred[i].offset = 0.0;
71
+ }
72
+ m_pred[0].coeff = m_pred[3].coeff = 0.75;
73
+ if (m_param->rc.qCompress >= 0.8) // when tuned for grain
74
+ {
75
+ m_pred[1].coeff = 0.75;
76
+ m_pred[0].coeff = m_pred[3].coeff = 0.50;
77
+ }
78
+}
79
+
80
int RateControl::rateControlStart(Frame* curFrame, RateControlEntry* rce, Encoder* enc)
81
{
82
int orderValue = m_startEndOrder.get();
83
84
copyRceData(rce, &m_rce2Pass[rce->poc]);
85
}
86
rce->isActive = true;
87
- if (m_sliceType == B_SLICE)
88
- rce->bframes = m_leadingBframes;
89
- else
90
- m_leadingBframes = curFrame->m_lowres.leadingBframes;
91
+ bool isRefFrameScenecut = m_sliceType!= I_SLICE && m_curSlice->m_refPicList[0][0]->m_lowres.bScenecut == 1;
92
+ if (curFrame->m_lowres.bScenecut)
93
+ {
94
+ m_isSceneTransition = true;
95
+ m_lastPredictorReset = rce->encodeOrder;
96
+ initFramePredictors();
97
+ }
98
+ else if (m_sliceType != B_SLICE && !isRefFrameScenecut)
99
+ m_isSceneTransition = false;
100
+
101
+ if (rce->encodeOrder < m_lastPredictorReset + m_param->frameNumThreads)
102
+ {
103
+ rce->rowPreds[0][0].count = 0;
104
+ }
105
106
rce->bLastMiniGopBFrame = curFrame->m_lowres.bLastMiniGopBFrame;
107
rce->bufferRate = m_bufferRate;
108
109
}
110
}
111
}
112
+ /* For a scenecut that occurs within the mini-gop, enable scene transition
113
+ * switch until the next mini-gop to ensure a min qp for all the frames within
114
+ * the scene-transition mini-gop */
115
+
116
double q = x265_qScale2qp(rateEstimateQscale(curFrame, rce));
117
q = x265_clip3((double)QP_MIN, (double)QP_MAX_MAX, q);
118
m_qp = int(q + 0.5);
119
120
}
121
m_framesDone++;
122
123
- /* CQP and CRF (without capped VBV) doesn't use mid-frame statistics to
124
- * tune RateControl parameters for other frames.
125
- * Hence, for these modes, update m_startEndOrder and unlock RC for previous threads waiting in
126
- * RateControlEnd here.those modes here. For the rest - ABR
127
- * and VBV, unlock only after rateControlUpdateStats of this frame is called */
128
- if (m_param->rc.rateControlMode != X265_RC_ABR && !m_isVbv)
129
- {
130
- m_startEndOrder.incr();
131
-
132
- if (rce->encodeOrder < m_param->frameNumThreads - 1)
133
- m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames
134
- }
135
return m_qp;
136
}
137
138
139
else
140
q += m_pbOffset;
141
142
+ /* Set a min qp at scenechanges and transitions */
143
+ if (m_isSceneTransition)
144
+ {
145
+ q = X265_MAX(ABR_SCENECUT_INIT_QP_MIN, q);
146
+ double minScenecutQscale =x265_qp2qScale(ABR_SCENECUT_INIT_QP_MIN);
147
+ m_lastQScaleFor[P_SLICE] = X265_MAX(minScenecutQscale, m_lastQScaleFor[P_SLICE]);
148
+ }
149
double qScale = x265_qp2qScale(q);
150
rce->qpNoVbv = q;
151
double lmin = 0, lmax = 0;
152
153
q = X265_MIN(lqmax, q);
154
}
155
q = x265_clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
156
+ /* Set a min qp at scenechanges and transitions */
157
+ if (m_isSceneTransition)
158
+ {
159
+ double minScenecutQscale =x265_qp2qScale(ABR_SCENECUT_INIT_QP_MIN);
160
+ q = X265_MAX(minScenecutQscale, q);
161
+ m_lastQScaleFor[P_SLICE] = X265_MAX(minScenecutQscale, m_lastQScaleFor[P_SLICE]);
162
+ }
163
rce->qpNoVbv = x265_qScale2qp(q);
164
q = clipQscale(curFrame, rce, q);
165
/* clip qp to permissible range after vbv-lookahead estimation to avoid possible
166
- * mispredictions by initial frame size predictors */
167
- if (!m_2pass && m_isVbv && m_pred[m_predType].count == 1)
168
+ * mispredictions by initial frame size predictors, after each scenecut */
169
+ bool isFrameAfterScenecut = m_sliceType!= I_SLICE && m_curSlice->m_refPicList[0][0]->m_lowres.bScenecut;
170
+ if (!m_2pass && m_isVbv && isFrameAfterScenecut)
171
q = x265_clip3(lqmin, lqmax, q);
172
}
173
m_lastQScaleFor[m_sliceType] = q;
174
175
}
176
/* Try to get the buffer not more than 80% filled, but don't set an impossible goal. */
177
targetFill = x265_clip3(m_bufferSize * (1 - 0.2 * finalDur), m_bufferSize, m_bufferFill - totalDuration * m_vbvMaxRate * 0.5);
178
- if (m_isCbr && bufferFillCur > targetFill)
179
+ if (m_isCbr && bufferFillCur > targetFill && !m_isSceneTransition)
180
{
181
q /= 1.01;
182
loopTerminate |= 2;
183
184
else if (picType == P_SLICE)
185
{
186
intraCostForPendingCus = curEncData.m_rowStat[row].intraSatdForVbv - curEncData.m_rowStat[row].diagIntraSatd;
187
+ intraCostForPendingCus >>= X265_DEPTH - 8;
188
/* Our QP is lower than the reference! */
189
double pred_intra = predictSize(rce->rowPred[1], qScale, intraCostForPendingCus);
190
/* Sum: better to overestimate than underestimate by using only one of the two predictors. */
191
192
uint64_t intraRowSatdCost = curEncData.m_rowStat[row].diagIntraSatd;
193
if (row == 1)
194
intraRowSatdCost += curEncData.m_rowStat[0].diagIntraSatd;
195
-
196
+ intraRowSatdCost >>= X265_DEPTH - 8;
197
updatePredictor(rce->rowPred[1], qScaleVbv, (double)intraRowSatdCost, encodedBits);
198
}
199
}
200
201
x265_1.7.tar.gz/source/encoder/ratecontrol.h -> x265_1.8.tar.gz/source/encoder/ratecontrol.h
Changed
74
1
2
#include "common.h"
3
#include "sei.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// encoder namespace
8
9
class Encoder;
10
11
#define MIN_AMORTIZE_FRACTION 0.2
12
#define CLIP_DURATION(f) x265_clip3(MIN_FRAME_DURATION, MAX_FRAME_DURATION, f)
13
14
-/* Current frame stats for 2 pass */
15
-struct FrameStats
16
-{
17
- int mvBits; /* MV bits (MV+Ref+Block Type) */
18
- int coeffBits; /* Texture bits (DCT coefs) */
19
- int miscBits;
20
-
21
- int iCuCnt;
22
- int pCuCnt;
23
- int skipCuCnt;
24
-
25
- /* CU type counts stored as percentage */
26
- double percentIntra;
27
- double percentInter;
28
- double percentSkip;
29
-};
30
-
31
struct Predictor
32
{
33
double coeff;
34
35
double m_pbOffset;
36
int64_t m_bframeBits;
37
int64_t m_currentSatd;
38
- int m_leadingBframes;
39
int m_qpConstant[3];
40
int m_lastNonBPictType;
41
int m_framesDone; /* # of frames passed through RateCotrol already */
42
43
int64_t m_lastBsliceSatdCost;
44
int m_numBframesInPattern;
45
bool m_isPatternPresent;
46
+ bool m_isSceneTransition;
47
+ int m_lastPredictorReset;
48
49
/* a common variable on which rateControlStart, rateControlEnd and rateControUpdateStats waits to
50
* sync the calls to these functions. For example
51
52
// to be called for each curFrame to process RateControl and set QP
53
int rateControlStart(Frame* curFrame, RateControlEntry* rce, Encoder* enc);
54
void rateControlUpdateStats(RateControlEntry* rce);
55
- int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* rce, FrameStats* stats);
56
+ int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* rce);
57
int rowDiagonalVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);
58
int rateControlSliceType(int frameNum);
59
bool cuTreeReadFor2Pass(Frame* curFrame);
60
void hrdFullness(SEIBufferingPeriod* sei);
61
-
62
+ int writeRateControlFrameStats(Frame* curFrame, RateControlEntry* rce);
63
protected:
64
65
static const int s_slidingWindowFrames;
66
67
void checkAndResetABR(RateControlEntry* rce, bool isFrameDone);
68
double predictRowsSizeSum(Frame* pic, RateControlEntry* rce, double qpm, int32_t& encodedBits);
69
bool initPass2();
70
+ void initFramePredictors();
71
double getDiffLimitedQScale(RateControlEntry *rce, double q);
72
double countExpectedBits();
73
bool vbv2Pass(uint64_t allAvailableBits);
74
x265_1.7.tar.gz/source/encoder/rdcost.h -> x265_1.8.tar.gz/source/encoder/rdcost.h
Changed
66
1
2
#include "common.h"
3
#include "slice.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class RDCost
10
11
m_lambda = (uint64_t)floor(256.0 * lambda);
12
}
13
14
- inline uint64_t calcRdCost(uint32_t distortion, uint32_t bits) const
15
+ inline uint64_t calcRdCost(sse_ret_t distortion, uint32_t bits) const
16
{
17
+#if X265_DEPTH <= 10
18
X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda2,
19
- "calcRdCost wrap detected dist: %u, bits %u, lambda: "X265_LL"\n", distortion, bits, m_lambda2);
20
+ "calcRdCost wrap detected dist: %u, bits %u, lambda: " X265_LL "\n",
21
+ distortion, bits, m_lambda2);
22
+#else
23
+ X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda2,
24
+ "calcRdCost wrap detected dist: " X265_LL ", bits %u, lambda: " X265_LL "\n",
25
+ distortion, bits, m_lambda2);
26
+#endif
27
return distortion + ((bits * m_lambda2 + 128) >> 8);
28
}
29
30
31
}
32
33
/* return the RD cost of this prediction, including the effect of psy-rd */
34
- inline uint64_t calcPsyRdCost(uint32_t distortion, uint32_t bits, uint32_t psycost) const
35
+ inline uint64_t calcPsyRdCost(sse_ret_t distortion, uint32_t bits, uint32_t psycost) const
36
{
37
return distortion + ((m_lambda * m_psyRd * psycost) >> 24) + ((bits * m_lambda2) >> 8);
38
}
39
40
inline uint64_t calcRdSADCost(uint32_t sadCost, uint32_t bits) const
41
{
42
X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda,
43
- "calcRdSADCost wrap detected dist: %u, bits %u, lambda: "X265_LL"\n", sadCost, bits, m_lambda);
44
+ "calcRdSADCost wrap detected dist: %u, bits %u, lambda: " X265_LL "\n", sadCost, bits, m_lambda);
45
return sadCost + ((bits * m_lambda + 128) >> 8);
46
}
47
48
- inline uint32_t scaleChromaDist(uint32_t plane, uint32_t dist) const
49
+ inline sse_ret_t scaleChromaDist(uint32_t plane, sse_ret_t dist) const
50
{
51
+#if X265_DEPTH <= 10
52
+ X265_CHECK(dist <= (UINT64_MAX - 128) / m_chromaDistWeight[plane - 1],
53
+ "scaleChromaDist wrap detected dist: %u, lambda: %u\n",
54
+ dist, m_chromaDistWeight[plane - 1]);
55
+#else
56
X265_CHECK(dist <= (UINT64_MAX - 128) / m_chromaDistWeight[plane - 1],
57
- "scaleChromaDist wrap detected dist: %u, lambda: %u\n", dist, m_chromaDistWeight[plane - 1]);
58
- return (uint32_t)((dist * (uint64_t)m_chromaDistWeight[plane - 1] + 128) >> 8);
59
+ "scaleChromaDist wrap detected dist: " X265_LL " lambda: %u\n",
60
+ dist, m_chromaDistWeight[plane - 1]);
61
+#endif
62
+ return (sse_ret_t)((dist * (uint64_t)m_chromaDistWeight[plane - 1] + 128) >> 8);
63
}
64
65
inline uint32_t getCost(uint32_t bits) const
66
x265_1.7.tar.gz/source/encoder/reference.cpp -> x265_1.8.tar.gz/source/encoder/reference.cpp
Changed
10
1
2
3
#include "reference.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
MotionReference::MotionReference()
9
{
10
x265_1.7.tar.gz/source/encoder/reference.h -> x265_1.8.tar.gz/source/encoder/reference.h
Changed
10
1
2
#include "lowres.h"
3
#include "mv.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private x265 namespace
8
9
struct WeightParam;
10
x265_1.7.tar.gz/source/encoder/sao.cpp -> x265_1.8.tar.gz/source/encoder/sao.cpp
Changed
201
1
2
return (x >> 31) | ((int)((((uint32_t)-x)) >> 31));
3
}
4
5
+inline int signOf2(const int a, const int b)
6
+{
7
+ // NOTE: don't reorder below compare, both ICL, VC, GCC optimize strong depends on order!
8
+ int r = 0;
9
+ if (a < b)
10
+ r = -1;
11
+ if (a > b)
12
+ r = 1;
13
+ return r;
14
+}
15
+
16
inline int64_t estSaoDist(int32_t count, int offset, int32_t offsetOrg)
17
{
18
return (count * offset - offsetOrg * 2) * offset;
19
}
20
-
21
} // end anonymous namespace
22
23
24
-namespace x265 {
25
+namespace X265_NS {
26
27
const uint32_t SAO::s_eoTable[NUM_EDGETYPE] =
28
{
29
30
frame->m_encData->m_saoParam = saoParam;
31
}
32
33
- rdoSaoUnitRowInit(saoParam);
34
+ saoParam->bSaoFlag[0] = true;
35
+ saoParam->bSaoFlag[1] = true;
36
37
- // NOTE: Disable SAO automatic turn-off when frame parallelism is
38
- // enabled for output exact independent of frame thread count
39
- if (m_param->frameNumThreads > 1)
40
+ m_numNoSao[0] = 0; // Luma
41
+ m_numNoSao[1] = 0; // Chroma
42
+
43
+ // NOTE: Allow SAO automatic turn-off only when frame parallelism is disabled.
44
+ if (m_param->frameNumThreads == 1)
45
{
46
- saoParam->bSaoFlag[0] = true;
47
- saoParam->bSaoFlag[1] = true;
48
+ if (m_refDepth > 0 && m_depthSaoRate[0][m_refDepth - 1] > SAO_ENCODING_RATE)
49
+ saoParam->bSaoFlag[0] = false;
50
+ if (m_refDepth > 0 && m_depthSaoRate[1][m_refDepth - 1] > SAO_ENCODING_RATE_CHROMA)
51
+ saoParam->bSaoFlag[1] = false;
52
}
53
}
54
55
56
/* Calculate SAO statistics for current CTU without non-crossing slice */
57
void SAO::calcSaoStatsCu(int addr, int plane)
58
{
59
- int x, y;
60
const CUData* cu = m_frame->m_encData->getPicCTU(addr);
61
const pixel* fenc0 = m_frame->m_fencPic->getPlaneAddr(plane, addr);
62
const pixel* rec0 = m_frame->m_reconPic->getPlaneAddr(plane, addr);
63
64
int startY;
65
int endX;
66
int endY;
67
- int32_t* stats;
68
- int32_t* count;
69
70
int skipB = plane ? 2 : 4;
71
int skipR = plane ? 3 : 5;
72
73
74
// SAO_BO:
75
{
76
- const int boShift = X265_DEPTH - SAO_BO_BITS;
77
-
78
if (m_param->bSaoNonDeblocked)
79
{
80
skipB = plane ? 1 : 3;
81
skipR = plane ? 2 : 4;
82
}
83
- stats = m_offsetOrg[plane][SAO_BO];
84
- count = m_count[plane][SAO_BO];
85
-
86
- fenc = fenc0;
87
- rec = rec0;
88
89
endX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
90
endY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
91
92
- for (y = 0; y < endY; y++)
93
- {
94
- for (x = 0; x < endX; x++)
95
- {
96
- int classIdx = 1 + (rec[x] >> boShift);
97
- stats[classIdx] += (fenc[x] - rec[x]);
98
- count[classIdx]++;
99
- }
100
-
101
- fenc += stride;
102
- rec += stride;
103
- }
104
+ primitives.saoCuStatsBO(fenc0, rec0, stride, endX, endY, m_offsetOrg[plane][SAO_BO], m_count[plane][SAO_BO]);
105
}
106
107
{
108
109
skipB = plane ? 1 : 3;
110
skipR = plane ? 3 : 5;
111
}
112
- stats = m_offsetOrg[plane][SAO_EO_0];
113
- count = m_count[plane][SAO_EO_0];
114
-
115
- fenc = fenc0;
116
- rec = rec0;
117
118
startX = !lpelx;
119
endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
120
- for (y = 0; y < ctuHeight - skipB; y++)
121
- {
122
- int signLeft = signOf(rec[startX] - rec[startX - 1]);
123
- for (x = startX; x < endX; x++)
124
- {
125
- int signRight = signOf(rec[x] - rec[x + 1]);
126
- int edgeType = signRight + signLeft + 2;
127
- signLeft = -signRight;
128
-
129
- stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
130
- count[s_eoTable[edgeType]]++;
131
- }
132
133
- fenc += stride;
134
- rec += stride;
135
- }
136
+ primitives.saoCuStatsE0(fenc0 + startX, rec0 + startX, stride, endX - startX, ctuHeight - skipB, m_offsetOrg[plane][SAO_EO_0], m_count[plane][SAO_EO_0]);
137
}
138
139
// SAO_EO_1: // dir: |
140
141
skipB = plane ? 2 : 4;
142
skipR = plane ? 2 : 4;
143
}
144
- stats = m_offsetOrg[plane][SAO_EO_1];
145
- count = m_count[plane][SAO_EO_1];
146
147
fenc = fenc0;
148
rec = rec0;
149
150
151
primitives.sign(upBuff1, rec, &rec[- stride], ctuWidth);
152
153
- for (y = startY; y < endY; y++)
154
- {
155
- for (x = 0; x < endX; x++)
156
- {
157
- int8_t signDown = signOf(rec[x] - rec[x + stride]);
158
- int edgeType = signDown + upBuff1[x] + 2;
159
- upBuff1[x] = -signDown;
160
-
161
- stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
162
- count[s_eoTable[edgeType]]++;
163
- }
164
-
165
- fenc += stride;
166
- rec += stride;
167
- }
168
+ primitives.saoCuStatsE1(fenc0 + startY * stride, rec0 + startY * stride, stride, upBuff1, endX, endY - startY, m_offsetOrg[plane][SAO_EO_1], m_count[plane][SAO_EO_1]);
169
}
170
171
// SAO_EO_2: // dir: 135
172
173
skipB = plane ? 2 : 4;
174
skipR = plane ? 3 : 5;
175
}
176
- stats = m_offsetOrg[plane][SAO_EO_2];
177
- count = m_count[plane][SAO_EO_2];
178
179
fenc = fenc0;
180
rec = rec0;
181
182
183
primitives.sign(&upBuff1[startX], &rec[startX], &rec[startX - stride - 1], (endX - startX));
184
185
- for (y = startY; y < endY; y++)
186
- {
187
- upBufft[startX] = signOf(rec[startX + stride] - rec[startX - 1]);
188
- for (x = startX; x < endX; x++)
189
- {
190
- int8_t signDown = signOf(rec[x] - rec[x + stride + 1]);
191
- int edgeType = signDown + upBuff1[x] + 2;
192
- upBufft[x + 1] = -signDown;
193
- stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
194
- count[s_eoTable[edgeType]]++;
195
- }
196
-
197
- std::swap(upBuff1, upBufft);
198
-
199
- rec += stride;
200
- fenc += stride;
201
x265_1.7.tar.gz/source/encoder/sao.h -> x265_1.8.tar.gz/source/encoder/sao.h
Changed
42
1
2
#include "frame.h"
3
#include "entropy.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
enum SAOTypeLen
10
11
12
class SAO
13
{
14
-protected:
15
+public:
16
17
enum { SAO_MAX_DEPTH = 4 };
18
enum { SAO_BO_BITS = 5 };
19
enum { MAX_NUM_SAO_CLASS = 33 };
20
- enum { SAO_BIT_INC = X265_MAX(X265_DEPTH - 10, 0) };
21
+ enum { SAO_BIT_INC = 0 }; /* in HM12.0, it wrote as X265_MAX(X265_DEPTH - 10, 0) */
22
enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
23
enum { NUM_EDGETYPE = 5 };
24
enum { NUM_PLANE = 3 };
25
26
typedef int32_t (PerClass[MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
27
typedef int32_t (PerPlane[NUM_PLANE][MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
28
29
+protected:
30
+
31
/* allocated per part */
32
PerClass* m_count;
33
PerClass* m_offset;
34
35
int32_t* currentDistortionTableBo, double* currentRdCostTableBo);
36
inline int64_t estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t* currentDistortionTableBo, double* currentRdCostTableBo);
37
38
- void rdoSaoUnitRowInit(SAOParam* saoParam);
39
void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
40
void rdoSaoUnitRow(SAOParam* saoParam, int idxY);
41
};
42
x265_1.7.tar.gz/source/encoder/search.cpp -> x265_1.8.tar.gz/source/encoder/search.cpp
Changed
201
1
2
#include "analysis.h" // TLD
3
#include "framedata.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
#if _MSC_VER
9
#pragma warning(disable: 4800) // 'uint8_t' : forcing value to bool 'true' or 'false' (performance warning)
10
11
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
12
if (numSig)
13
{
14
- m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
15
+ m_quant.invtransformNxN(cu, residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
16
primitives.cu[sizeIdx].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
17
}
18
else
19
20
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTSkip);
21
if (numSig)
22
{
23
- m_quant.invtransformNxN(residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
24
+ m_quant.invtransformNxN(cu, residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
25
primitives.cu[sizeIdx].add_ps(tmpRecon, tmpReconStride, pred, residual, stride, stride);
26
}
27
else if (useTSkip)
28
29
// no residual coded, recon = pred
30
primitives.cu[sizeIdx].copy_pp(tmpRecon, tmpReconStride, pred, stride);
31
32
- uint32_t tmpDist = primitives.cu[sizeIdx].sse_pp(tmpRecon, tmpReconStride, fenc, stride);
33
+ sse_ret_t tmpDist = primitives.cu[sizeIdx].sse_pp(tmpRecon, tmpReconStride, fenc, stride);
34
35
cu.setTransformSkipSubParts(useTSkip, TEXT_LUMA, absPartIdx, fullDepth);
36
cu.setCbfSubParts((!!numSig) << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
37
38
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
39
if (numSig)
40
{
41
- m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
42
+ m_quant.invtransformNxN(cu, residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
43
primitives.cu[sizeIdx].add_ps(picReconY, picStride, pred, residual, stride, stride);
44
cu.setCbfSubParts(1 << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
45
}
46
47
uint32_t qtLayer = log2TrSize - 2;
48
uint32_t stride = mode.fencYuv->m_csize;
49
const uint32_t sizeIdxC = log2TrSizeC - 2;
50
- uint32_t outDist = 0;
51
+ sse_ret_t outDist = 0;
52
53
uint32_t curPartNum = cuGeom.numPartitions >> tuDepthC * 2;
54
const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
55
56
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
57
if (numSig)
58
{
59
- m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
60
+ m_quant.invtransformNxN(cu, residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
61
primitives.cu[sizeIdxC].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
62
cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
63
}
64
65
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, useTSkip);
66
if (numSig)
67
{
68
- m_quant.invtransformNxN(residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
69
+ m_quant.invtransformNxN(cu, residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
70
primitives.cu[sizeIdxC].add_ps(recon, reconStride, pred, residual, stride, stride);
71
cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
72
}
73
74
primitives.cu[sizeIdxC].copy_pp(recon, reconStride, pred, stride);
75
cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);
76
}
77
- uint32_t tmpDist = primitives.cu[sizeIdxC].sse_pp(recon, reconStride, fenc, stride);
78
+ sse_ret_t tmpDist = primitives.cu[sizeIdxC].sse_pp(recon, reconStride, fenc, stride);
79
tmpDist = m_rdCost.scaleChromaDist(chromaId, tmpDist);
80
81
cu.setTransformSkipPartRange(useTSkip, ttype, absPartIdxC, tuIterator.absPartIdxStep);
82
83
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
84
if (numSig)
85
{
86
- m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
87
+ m_quant.invtransformNxN(cu, residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
88
primitives.cu[sizeIdxC].add_ps(picReconC, picStride, pred, residual, stride, stride);
89
cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
90
}
91
92
93
cu.setPartSizeSubParts(partSize);
94
cu.setPredModeSubParts(MODE_INTRA);
95
- m_quant.m_tqBypass = !!cu.m_tqBypass[0];
96
97
uint32_t tuDepthRange[2];
98
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
99
100
intraMode.initCosts();
101
- intraMode.distortion += estIntraPredQT(intraMode, cuGeom, tuDepthRange, sharedModes);
102
- intraMode.distortion += estIntraPredChromaQT(intraMode, cuGeom, sharedChromaModes);
103
+ intraMode.lumaDistortion += estIntraPredQT(intraMode, cuGeom, tuDepthRange, sharedModes);
104
+ intraMode.chromaDistortion += estIntraPredChromaQT(intraMode, cuGeom, sharedChromaModes);
105
+ intraMode.distortion += intraMode.lumaDistortion + intraMode.chromaDistortion;
106
107
m_entropyCoder.resetBits();
108
if (m_slice->m_pps->bTransquantBypassEnabled)
109
110
codeIntraLumaQT(intraMode, cuGeom, 0, 0, false, icosts, tuDepthRange);
111
extractIntraResultQT(cu, *reconYuv, 0, 0);
112
113
- intraMode.distortion = icosts.distortion;
114
- intraMode.distortion += estIntraPredChromaQT(intraMode, cuGeom, NULL);
115
+ intraMode.lumaDistortion = icosts.distortion;
116
+ intraMode.chromaDistortion = estIntraPredChromaQT(intraMode, cuGeom, NULL);
117
+ intraMode.distortion = intraMode.lumaDistortion + intraMode.chromaDistortion;
118
119
m_entropyCoder.resetBits();
120
if (m_slice->m_pps->bTransquantBypassEnabled)
121
122
return outCost;
123
}
124
125
+/* find the lowres motion vector from lookahead in middle of current PU */
126
+MV Search::getLowresMV(const CUData& cu, const PredictionUnit& pu, int list, int ref)
127
+{
128
+ int diffPoc = abs(m_slice->m_poc - m_slice->m_refPicList[list][ref]->m_poc);
129
+ if (diffPoc > m_param->bframes + 1)
130
+ /* poc difference is out of range for lookahead */
131
+ return 0;
132
+
133
+ MV* mvs = m_frame->m_lowres.lowresMvs[list][diffPoc - 1];
134
+ if (mvs[0].x == 0x7FFF)
135
+ /* this motion search was not estimated by lookahead */
136
+ return 0;
137
+
138
+ uint32_t block_x = (cu.m_cuPelX + g_zscanToPelX[pu.puAbsPartIdx] + pu.width / 2) >> 4;
139
+ uint32_t block_y = (cu.m_cuPelY + g_zscanToPelY[pu.puAbsPartIdx] + pu.height / 2) >> 4;
140
+ uint32_t idx = block_y * m_frame->m_lowres.maxBlocksInRow + block_x;
141
+
142
+ X265_CHECK(block_x < m_frame->m_lowres.maxBlocksInRow, "block_x is too high\n");
143
+ X265_CHECK(block_y < m_frame->m_lowres.maxBlocksInCol, "block_y is too high\n");
144
+
145
+ return mvs[idx] << 1; /* scale up lowres mv */
146
+}
147
+
148
/* Pick between the two AMVP candidates which is the best one to use as
149
* MVP for the motion search, based on SAD cost */
150
int Search::selectMVP(const CUData& cu, const PredictionUnit& pu, const MV amvp[AMVP_NUM_CANDS], int list, int ref)
151
152
/* Perform ME, repeat until no more work is available */
153
do
154
{
155
- if (meId < m_slice->m_numRefIdx[0])
156
- slave.singleMotionEstimation(*this, pme.mode, pme.pu, pme.puIdx, 0, meId);
157
+ if (meId < pme.m_jobs.refCnt[0])
158
+ {
159
+ int refIdx = pme.m_jobs.ref[0][meId]; //L0
160
+ slave.singleMotionEstimation(*this, pme.mode, pme.pu, pme.puIdx, 0, refIdx);
161
+ }
162
else
163
- slave.singleMotionEstimation(*this, pme.mode, pme.pu, pme.puIdx, 1, meId - m_slice->m_numRefIdx[0]);
164
+ {
165
+ int refIdx = pme.m_jobs.ref[1][meId - pme.m_jobs.refCnt[0]]; //L1
166
+ slave.singleMotionEstimation(*this, pme.mode, pme.pu, pme.puIdx, 1, refIdx);
167
+ }
168
169
meId = -1;
170
pme.m_lock.acquire();
171
172
173
MotionData* bestME = interMode.bestME[part];
174
175
- MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 1];
176
+ // 12 mv candidates including lowresMV
177
+ MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 2];
178
int numMvc = interMode.cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
179
180
const MV* amvp = interMode.amvpCand[list][ref];
181
int mvpIdx = selectMVP(interMode.cu, pu, amvp, list, ref);
182
MV mvmin, mvmax, outmv, mvp = amvp[mvpIdx];
183
184
+ MV lmv = getLowresMV(interMode.cu, pu, list, ref);
185
+ if (lmv.notZero())
186
+ mvc[numMvc++] = lmv;
187
+
188
setSearchRange(interMode.cu, mvp, m_param->searchRange, mvmin, mvmax);
189
190
int satdCost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv);
191
192
}
193
194
/* find the best inter prediction for each PU of specified mode */
195
-void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC)
196
+void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t refMasks[2])
197
{
198
ProfileCUScope(interMode.cu, motionEstimationElapsedTime, countMotionEstimate);
199
200
CUData& cu = interMode.cu;
201
x265_1.7.tar.gz/source/encoder/search.h -> x265_1.8.tar.gz/source/encoder/search.h
Changed
148
1
2
#define ProfileCounter(cu, count)
3
#endif
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class Entropy;
10
11
uint64_t sa8dCost; // sum of partition sa8d distortion costs (sa8d(fenc, pred) + lambda * bits)
12
uint32_t sa8dBits; // signal bits used in sa8dCost calculation
13
uint32_t psyEnergy; // sum of partition psycho-visual energy difference
14
- uint32_t distortion; // sum of partition SSE distortion
15
+ sse_ret_t lumaDistortion;
16
+ sse_ret_t chromaDistortion;
17
+ sse_ret_t distortion; // sum of partition SSE distortion
18
uint32_t totalBits; // sum of partition bits (mv + coeff)
19
uint32_t mvBits; // Mv bits + Ref + block type (or intra mode)
20
uint32_t coeffBits; // Texture bits (DCT Coeffs)
21
22
sa8dCost = 0;
23
sa8dBits = 0;
24
psyEnergy = 0;
25
+ lumaDistortion = 0;
26
+ chromaDistortion = 0;
27
distortion = 0;
28
totalBits = 0;
29
mvBits = 0;
30
31
sa8dCost = UINT64_MAX / 2;
32
sa8dBits = MAX_UINT / 2;
33
psyEnergy = MAX_UINT / 2;
34
+#if X265_DEPTH <= 10
35
+ lumaDistortion = MAX_UINT / 2;
36
+ chromaDistortion = MAX_UINT / 2;
37
distortion = MAX_UINT / 2;
38
+#else
39
+ lumaDistortion = UINT64_MAX / 2;
40
+ chromaDistortion = UINT64_MAX / 2;
41
+ distortion = UINT64_MAX / 2;
42
+#endif
43
totalBits = MAX_UINT / 2;
44
mvBits = MAX_UINT / 2;
45
coeffBits = MAX_UINT / 2;
46
47
48
bool ok() const
49
{
50
+#if X265_DEPTH <= 10
51
+ return !(rdCost >= UINT64_MAX / 2 ||
52
+ sa8dCost >= UINT64_MAX / 2 ||
53
+ sa8dBits >= MAX_UINT / 2 ||
54
+ psyEnergy >= MAX_UINT / 2 ||
55
+ lumaDistortion >= MAX_UINT / 2 ||
56
+ chromaDistortion >= MAX_UINT / 2 ||
57
+ distortion >= MAX_UINT / 2 ||
58
+ totalBits >= MAX_UINT / 2 ||
59
+ mvBits >= MAX_UINT / 2 ||
60
+ coeffBits >= MAX_UINT / 2);
61
+#else
62
return !(rdCost >= UINT64_MAX / 2 ||
63
sa8dCost >= UINT64_MAX / 2 ||
64
sa8dBits >= MAX_UINT / 2 ||
65
psyEnergy >= MAX_UINT / 2 ||
66
- distortion >= MAX_UINT / 2 ||
67
+ lumaDistortion >= UINT64_MAX / 2 ||
68
+ chromaDistortion >= UINT64_MAX / 2 ||
69
+ distortion >= UINT64_MAX / 2 ||
70
totalBits >= MAX_UINT / 2 ||
71
mvBits >= MAX_UINT / 2 ||
72
coeffBits >= MAX_UINT / 2);
73
+#endif
74
}
75
76
void addSubCosts(const Mode& subMode)
77
78
sa8dCost += subMode.sa8dCost;
79
sa8dBits += subMode.sa8dBits;
80
psyEnergy += subMode.psyEnergy;
81
+ lumaDistortion += subMode.lumaDistortion;
82
+ chromaDistortion += subMode.chromaDistortion;
83
distortion += subMode.distortion;
84
totalBits += subMode.totalBits;
85
mvBits += subMode.mvBits;
86
87
int64_t weightAnalyzeTime; // elapsed worker time analyzing reference weights
88
int64_t totalCTUTime; // elapsed worker time in compressCTU (includes pmode master)
89
90
+ uint32_t skippedMotionReferences[NUM_CU_DEPTH];
91
+ uint32_t totalMotionReferences[NUM_CU_DEPTH];
92
+ uint32_t skippedIntraCU[NUM_CU_DEPTH];
93
+ uint32_t totalIntraCU[NUM_CU_DEPTH];
94
+
95
uint64_t countIntraRDO[NUM_CU_DEPTH];
96
uint64_t countInterRDO[NUM_CU_DEPTH];
97
uint64_t countIntraAnalysis;
98
99
interRDOElapsedTime[i] += other.interRDOElapsedTime[i];
100
countIntraRDO[i] += other.countIntraRDO[i];
101
countInterRDO[i] += other.countInterRDO[i];
102
+ skippedMotionReferences[i] += other.skippedMotionReferences[i];
103
+ totalMotionReferences[i] += other.totalMotionReferences[i];
104
+ skippedIntraCU[i] += other.skippedIntraCU[i];
105
+ totalIntraCU[i] += other.totalIntraCU[i];
106
}
107
108
intraAnalysisElapsedTime += other.intraAnalysisElapsedTime;
109
110
void encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
111
112
// estimation inter prediction (non-skip)
113
- void predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC);
114
+ void predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t masks[2]);
115
116
// encode residual and compute rd-cost for inter mode
117
void encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);
118
119
void checkDQP(Mode& mode, const CUGeom& cuGeom);
120
void checkDQPForSplitPred(Mode& mode, const CUGeom& cuGeom);
121
122
+ MV getLowresMV(const CUData& cu, const PredictionUnit& pu, int list, int ref);
123
+
124
class PME : public BondedTaskGroup
125
{
126
public:
127
128
const PredictionUnit& pu;
129
int puIdx;
130
131
+ struct {
132
+ int ref[2][MAX_NUM_REF];
133
+ int refCnt[2];
134
+ } m_jobs;
135
+
136
PME(Search& s, Mode& m, const CUGeom& g, const PredictionUnit& u, int p) : master(s), mode(m), cuGeom(g), pu(u), puIdx(p) {}
137
138
void processTasks(int workerThreadId);
139
140
{
141
uint64_t rdcost;
142
uint32_t bits;
143
- uint32_t distortion;
144
+ sse_ret_t distortion;
145
uint32_t energy;
146
Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
147
};
148
x265_1.7.tar.gz/source/encoder/sei.cpp -> x265_1.8.tar.gz/source/encoder/sei.cpp
Changed
10
1
2
#include "slice.h"
3
#include "sei.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
/* x265's identifying GUID */
9
const uint8_t SEIuserDataUnregistered::m_uuid_iso_iec_11578[16] = {
10
x265_1.7.tar.gz/source/encoder/sei.h -> x265_1.8.tar.gz/source/encoder/sei.h
Changed
10
1
2
#include "bitstream.h"
3
#include "slice.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
class SEI : public SyntaxElementWriter
10
x265_1.7.tar.gz/source/encoder/slicetype.cpp -> x265_1.8.tar.gz/source/encoder/slicetype.cpp
Changed
201
1
2
#define ProfileLookaheadTime(elapsed, count)
3
#endif
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
namespace {
9
10
11
/* Actual adaptive quantization */
12
int maxCol = curFrame->m_fencPic->m_picWidth;
13
int maxRow = curFrame->m_fencPic->m_picHeight;
14
- int blockWidth = ((param->sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
15
- int blockHeight = ((param->sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
16
- int blockCount = blockWidth * blockHeight;
17
+ int blockCount = curFrame->m_lowres.maxBlocksInRow * curFrame->m_lowres.maxBlocksInCol;
18
19
for (int y = 0; y < 3; y++)
20
{
21
22
{
23
blockXY = 0;
24
double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
25
- if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE)
26
+ double bias_strength = 0.f;
27
+ if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
28
{
29
- double bit_depth_correction = pow(1 << (X265_DEPTH - 8), 0.5);
30
+ double bit_depth_correction = 1.f / (1 << (2*(X265_DEPTH-8)));
31
for (blockY = 0; blockY < maxRow; blockY += 16)
32
{
33
for (blockX = 0; blockX < maxCol; blockX += 16)
34
{
35
uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp);
36
- qp_adj = pow(energy + 1, 0.1);
37
+ qp_adj = pow(energy * bit_depth_correction + 1, 0.1);
38
curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj;
39
avg_adj += qp_adj;
40
avg_adj_pow2 += qp_adj * qp_adj;
41
42
43
avg_adj /= blockCount;
44
avg_adj_pow2 /= blockCount;
45
- strength = param->rc.aqStrength * avg_adj / bit_depth_correction;
46
- avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (11.f * bit_depth_correction)) / avg_adj;
47
+ strength = param->rc.aqStrength * avg_adj;
48
+ avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (11.f)) / avg_adj;
49
+ bias_strength = param->rc.aqStrength;
50
}
51
else
52
strength = param->rc.aqStrength * 1.0397f;
53
54
{
55
for (blockX = 0; blockX < maxCol; blockX += 16)
56
{
57
- if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE)
58
+ if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
59
+ {
60
+ qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY];
61
+ qp_adj = strength * (qp_adj - avg_adj) + bias_strength * (1.f - 11.f / (qp_adj * qp_adj));
62
+ }
63
+ else if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE)
64
{
65
qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY];
66
qp_adj = strength * (qp_adj - avg_adj);
67
68
m_pool = pool;
69
70
m_lastNonB = NULL;
71
+ m_isSceneTransition = false;
72
m_scratch = NULL;
73
m_tld = NULL;
74
m_filled = false;
75
76
77
int numBFrames = 0;
78
int numAnalyzed = numFrames;
79
- if (m_param->scenecutThreshold && scenecut(frames, 0, 1, true, origNumFrames, maxSearch))
80
+ bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
81
+ /* When scenecut threshold is set, use scenecut detection for I frame placements */
82
+ if (m_param->scenecutThreshold && isScenecut)
83
{
84
frames[1]->sliceType = X265_TYPE_I;
85
return;
86
87
/* Check scenecut on the first minigop. */
88
for (int j = 1; j < numBFrames + 1; j++)
89
{
90
- if (m_param->scenecutThreshold && scenecut(frames, j, j + 1, false, origNumFrames, maxSearch))
91
+ if (scenecut(frames, j, j + 1, false, origNumFrames))
92
{
93
frames[j]->sliceType = X265_TYPE_P;
94
numAnalyzed = j;
95
break;
96
}
97
}
98
-
99
resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed + 1);
100
}
101
else
102
103
if (bIsVbvLookahead)
104
vbvLookahead(frames, numFrames, bKeyframe);
105
106
+ int maxp1 = X265_MIN(m_param->bframes + 1, origNumFrames);
107
/* Restore frame types for all frames that haven't actually been decided yet. */
108
for (int j = resetStart; j <= numFrames; j++)
109
+ {
110
frames[j]->sliceType = X265_TYPE_AUTO;
111
+ /* If any frame marked as scenecut is being restarted for sliceDecision,
112
+ * undo scene Transition flag */
113
+ if (j <= maxp1 && frames[j]->bScenecut && m_isSceneTransition)
114
+ m_isSceneTransition = false;
115
+ }
116
}
117
118
-bool Lookahead::scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames, int maxSearch)
119
+bool Lookahead::scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames)
120
{
121
/* Only do analysis during a normal scenecut check. */
122
if (bRealScenecut && m_param->bframes)
123
{
124
int origmaxp1 = p0 + 1;
125
/* Look ahead to avoid coding short flashes as scenecuts. */
126
- if (m_param->bFrameAdaptive == X265_B_ADAPT_TRELLIS)
127
- /* Don't analyse any more frames than the trellis would have covered. */
128
- origmaxp1 += m_param->bframes;
129
- else
130
- origmaxp1++;
131
+ origmaxp1 += m_param->bframes;
132
int maxp1 = X265_MIN(origmaxp1, numFrames);
133
-
134
+ bool fluctuate = false;
135
+ bool noScenecuts = false;
136
+ int64_t avgSatdCost = 0;
137
+ if (frames[0]->costEst[1][0] > -1)
138
+ avgSatdCost = frames[0]->costEst[1][0];
139
+ int cnt = 1;
140
/* Where A and B are scenes: AAAAAABBBAAAAAA
141
* If BBB is shorter than (maxp1-p0), it is detected as a flash
142
* and not considered a scenecut. */
143
for (int cp1 = p1; cp1 <= maxp1; cp1++)
144
{
145
if (!scenecutInternal(frames, p0, cp1, false))
146
+ {
147
/* Any frame in between p0 and cur_p1 cannot be a real scenecut. */
148
for (int i = cp1; i > p0; i--)
149
+ {
150
frames[i]->bScenecut = false;
151
+ noScenecuts = false;
152
+ }
153
+ }
154
+ else if (scenecutInternal(frames, cp1 - 1, cp1, false))
155
+ {
156
+ /* If current frame is a Scenecut from p0 frame as well as Scenecut from
157
+ * preceeding frame, mark it as a Scenecut */
158
+ frames[cp1]->bScenecut = true;
159
+ noScenecuts = true;
160
+ }
161
+
162
+ /* compute average satdcost of all the frames in the mini-gop to confirm
163
+ * whether there is any great fluctuation among them to rule out false positives */
164
+ X265_CHECK(frames[cp1]->costEst[cp1 - p0][0]!= -1, "costEst is not done \n");
165
+ avgSatdCost += frames[cp1]->costEst[cp1 - p0][0];
166
+ cnt++;
167
}
168
169
- /* Where A-F are scenes: AAAAABBCCDDEEFFFFFF
170
- * If each of BB ... EE are shorter than (maxp1-p0), they are
171
- * detected as flashes and not considered scenecuts.
172
- * Instead, the first F frame becomes a scenecut.
173
- * If the video ends before F, no frame becomes a scenecut. */
174
- for (int cp0 = p0; cp0 <= maxp1; cp0++)
175
+ /* Identify possible scene fluctuations by comparing the satd cost of the frames.
176
+ * This could denote the beginning or ending of scene transitions.
177
+ * During a scene transition(fade in/fade outs), if fluctuate remains false,
178
+ * then the scene had completed its transition or stabilized */
179
+ if (noScenecuts)
180
{
181
- if (origmaxp1 > maxSearch || (cp0 < maxp1 && scenecutInternal(frames, cp0, maxp1, false)))
182
- /* If cur_p0 is the p0 of a scenecut, it cannot be the p1 of a scenecut. */
183
- frames[cp0]->bScenecut = false;
184
+ fluctuate = false;
185
+ avgSatdCost /= cnt;
186
+ for (int i = p1; i <= maxp1; i++)
187
+ {
188
+ int64_t curCost = frames[i]->costEst[i - p0][0];
189
+ int64_t prevCost = frames[i - 1]->costEst[i - 1 - p0][0];
190
+ if (fabs((double)(curCost - avgSatdCost)) > 0.1 * avgSatdCost ||
191
+ fabs((double)(curCost - prevCost)) > 0.1 * prevCost)
192
+ {
193
+ fluctuate = true;
194
+ if (!m_isSceneTransition && frames[i]->bScenecut)
195
+ {
196
+ m_isSceneTransition = true;
197
+ /* just mark the first scenechange in the scene transition as a scenecut. */
198
+ for (int j = i + 1; j <= maxp1; j++)
199
+ frames[j]->bScenecut = false;
200
+ break;
201
x265_1.7.tar.gz/source/encoder/slicetype.h -> x265_1.8.tar.gz/source/encoder/slicetype.h
Changed
28
1
2
#include "piclist.h"
3
#include "threadpool.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private namespace
8
9
struct Lowres;
10
11
int m_numCoopSlices;
12
int m_numRowsPerSlice;
13
bool m_filled;
14
-
15
+ bool m_isSceneTransition;
16
Lookahead(x265_param *param, ThreadPool *pool);
17
18
#if DETAILED_CU_STATS
19
20
void slicetypeAnalyse(Lowres **frames, bool bKeyframe);
21
22
/* called by slicetypeAnalyse() to make slice decisions */
23
- bool scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames, int maxSearch);
24
+ bool scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames);
25
bool scenecutInternal(Lowres **frames, int p0, int p1, bool bRealScenecut);
26
void slicetypePath(Lowres **frames, int length, char(*best_paths)[X265_LOOKAHEAD_MAX + 1]);
27
int64_t slicetypePathCost(Lowres **frames, char *path, int64_t threshold);
28
x265_1.7.tar.gz/source/encoder/weightPrediction.cpp -> x265_1.8.tar.gz/source/encoder/weightPrediction.cpp
Changed
19
1
2
#include "mv.h"
3
#include "bitstream.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
namespace {
8
struct Cache
9
{
10
11
}
12
}
13
14
-namespace x265 {
15
+namespace X265_NS {
16
void weightAnalyse(Slice& slice, Frame& frame, x265_param& param)
17
{
18
WeightParam wp[2][MAX_NUM_REF][3];
19
x265_1.7.tar.gz/source/input/input.cpp -> x265_1.8.tar.gz/source/input/input.cpp
Changed
10
1
2
#include "yuv.h"
3
#include "y4m.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
InputFile* InputFile::open(InputFileInfo& info, bool bForceY4m)
9
{
10
x265_1.7.tar.gz/source/input/input.h -> x265_1.8.tar.gz/source/input/input.h
Changed
24
1
2
#define MIN_FRAME_RATE 1
3
#define MAX_FRAME_RATE 300
4
5
-#include "x265.h"
6
+#include "common.h"
7
8
-namespace x265 {
9
+namespace X265_NS {
10
// private x265 namespace
11
12
struct InputFileInfo
13
14
virtual bool isFail() = 0;
15
16
virtual const char *getName() const = 0;
17
+
18
+ virtual int getWidth() const = 0;
19
+
20
+ virtual int getHeight() const = 0;
21
};
22
}
23
24
x265_1.7.tar.gz/source/input/y4m.cpp -> x265_1.8.tar.gz/source/input/y4m.cpp
Changed
10
1
2
#endif
3
#endif
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
using namespace std;
8
9
static const char header[] = "FRAME";
10
x265_1.7.tar.gz/source/input/y4m.h -> x265_1.8.tar.gz/source/input/y4m.h
Changed
21
1
2
3
#define QUEUE_SIZE 5
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// x265 private namespace
8
9
class Y4MInput : public InputFile, public Thread
10
11
bool readPicture(x265_picture&);
12
13
const char *getName() const { return "y4m"; }
14
+
15
+ int getWidth() const { return width; }
16
+
17
+ int getHeight() const { return height; }
18
};
19
}
20
21
x265_1.7.tar.gz/source/input/yuv.cpp -> x265_1.8.tar.gz/source/input/yuv.cpp
Changed
10
1
2
#endif
3
#endif
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
using namespace std;
8
9
YUVInput::YUVInput(InputFileInfo& info)
10
x265_1.7.tar.gz/source/input/yuv.h -> x265_1.8.tar.gz/source/input/yuv.h
Changed
21
1
2
3
#define QUEUE_SIZE 5
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private x265 namespace
8
9
class YUVInput : public InputFile, public Thread
10
11
bool readPicture(x265_picture&);
12
13
const char *getName() const { return "yuv"; }
14
+
15
+ int getWidth() const { return width; }
16
+
17
+ int getHeight() const { return height; }
18
};
19
}
20
21
x265_1.7.tar.gz/source/output/output.cpp -> x265_1.8.tar.gz/source/output/output.cpp
Changed
10
1
2
3
#include "raw.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
ReconFile* ReconFile::open(const char *fname, int width, int height, uint32_t bitdepth, uint32_t fpsNum, uint32_t fpsDenom, int csp)
9
{
10
x265_1.7.tar.gz/source/output/output.h -> x265_1.8.tar.gz/source/output/output.h
Changed
10
1
2
#include "x265.h"
3
#include "input/input.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private x265 namespace
8
9
class ReconFile
10
x265_1.7.tar.gz/source/output/raw.cpp -> x265_1.8.tar.gz/source/output/raw.cpp
Changed
10
1
2
3
#include "raw.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
using namespace std;
8
9
RAWOutput::RAWOutput(const char* fname, InputFileInfo&)
10
x265_1.7.tar.gz/source/output/raw.h -> x265_1.8.tar.gz/source/output/raw.h
Changed
10
1
2
#include <fstream>
3
#include <iostream>
4
5
-namespace x265 {
6
+namespace X265_NS {
7
class RAWOutput : public OutputFile
8
{
9
protected:
10
x265_1.7.tar.gz/source/output/reconplay.cpp -> x265_1.8.tar.gz/source/output/reconplay.cpp
Changed
10
1
2
3
#include <signal.h>
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
#if _WIN32
9
#define popen _popen
10
x265_1.7.tar.gz/source/output/reconplay.h -> x265_1.8.tar.gz/source/output/reconplay.h
Changed
10
1
2
#include "threading.h"
3
#include <cstdio>
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private x265 namespace
8
9
class ReconPlay : public Thread
10
x265_1.7.tar.gz/source/output/y4m.cpp -> x265_1.8.tar.gz/source/output/y4m.cpp
Changed
10
1
2
#include "output.h"
3
#include "y4m.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
using namespace std;
8
9
Y4MOutput::Y4MOutput(const char *filename, int w, int h, uint32_t fpsNum, uint32_t fpsDenom, int csp)
10
x265_1.7.tar.gz/source/output/y4m.h -> x265_1.8.tar.gz/source/output/y4m.h
Changed
10
1
2
#include "output.h"
3
#include <fstream>
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private x265 namespace
8
9
class Y4MOutput : public ReconFile
10
x265_1.7.tar.gz/source/output/yuv.cpp -> x265_1.8.tar.gz/source/output/yuv.cpp
Changed
10
1
2
#include "output.h"
3
#include "yuv.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
using namespace std;
8
9
YUVOutput::YUVOutput(const char *filename, int w, int h, uint32_t d, int csp)
10
x265_1.7.tar.gz/source/output/yuv.h -> x265_1.8.tar.gz/source/output/yuv.h
Changed
10
1
2
3
#include <fstream>
4
5
-namespace x265 {
6
+namespace X265_NS {
7
// private x265 namespace
8
9
class YUVOutput : public ReconFile
10
x265_1.7.tar.gz/source/profile/vtune/vtune.cpp -> x265_1.8.tar.gz/source/profile/vtune/vtune.cpp
Changed
10
1
2
3
}
4
5
-namespace x265 {
6
+namespace X265_NS {
7
8
__itt_domain* domain;
9
__itt_string_handle* taskHandle[NUM_VTUNE_TASKS];
10
x265_1.7.tar.gz/source/profile/vtune/vtune.h -> x265_1.8.tar.gz/source/profile/vtune/vtune.h
Changed
10
1
2
3
#include "ittnotify.h"
4
5
-namespace x265 {
6
+namespace X265_NS {
7
8
#define CPU_EVENT(x) x,
9
enum VTuneTasksEnum
10
x265_1.7.tar.gz/source/test/CMakeLists.txt -> x265_1.8.tar.gz/source/test/CMakeLists.txt
Changed
17
1
2
+# vim: syntax=cmake
3
enable_language(ASM_YASM)
4
5
if(MSVC_IDE)
6
7
intrapredharness.cpp intrapredharness.h)
8
target_link_libraries(TestBench x265-static ${PLATFORM_LIBS})
9
if(LINKER_OPTIONS)
10
- set_target_properties(TestBench PROPERTIES LINK_FLAGS ${LINKER_OPTIONS})
11
+ if(EXTRA_LIB)
12
+ list(APPEND LINKER_OPTIONS "-L..")
13
+ endif(EXTRA_LIB)
14
+ string(REPLACE ";" " " LINKER_OPTION_STR "${LINKER_OPTIONS}")
15
+ set_target_properties(TestBench PROPERTIES LINK_FLAGS "${LINKER_OPTION_STR}")
16
endif()
17
x265_1.7.tar.gz/source/test/checkasm-a.asm -> x265_1.8.tar.gz/source/test/checkasm-a.asm
Changed
29
1
2
3
jz .ok
4
mov r9, rax
5
+ mov r10, rdx
6
lea r0, [error_message]
7
call puts
8
mov r1, [rsp+max_args*8]
9
mov dword [r1], 0
10
+ mov rdx, r10
11
mov rax, r9
12
.ok:
13
RET
14
15
or r3, r5
16
jz .ok
17
mov r3, eax
18
+ mov r4, edx
19
lea r1, [error_message]
20
push r1
21
call puts
22
add esp, 4
23
mov r1, r1m
24
mov dword [r1], 0
25
+ mov edx, r4
26
mov eax, r3
27
.ok:
28
REP_RET
29
x265_1.7.tar.gz/source/test/intrapredharness.cpp -> x265_1.8.tar.gz/source/test/intrapredharness.cpp
Changed
79
1
2
#include "predict.h"
3
#include "intrapredharness.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
IntraPredHarness::IntraPredHarness()
9
{
10
for (int i = 0; i < INPUT_SIZE; i++)
11
pixel_buff[i] = rand() % PIXEL_MAX;
12
+
13
+ /* [0] --- Random values
14
+ * [1] --- Minimum
15
+ * [2] --- Maximum */
16
+ for (int i = 0; i < BUFFSIZE; i++)
17
+ {
18
+ pixel_test_buff[0][i] = rand() % PIXEL_MAX;
19
+ pixel_test_buff[1][i] = PIXEL_MIN;
20
+ pixel_test_buff[2][i] = PIXEL_MAX;
21
+ }
22
}
23
24
bool IntraPredHarness::check_dc_primitive(intra_pred_t ref, intra_pred_t opt, int width)
25
26
return true;
27
}
28
29
+bool IntraPredHarness::check_intra_filter_primitive(const intra_filter_t ref, const intra_filter_t opt)
30
+{
31
+ memset(pixel_out_c, 0, 64 * 64 * sizeof(pixel));
32
+ memset(pixel_out_vec, 0, 64 * 64 * sizeof(pixel));
33
+ int j = 0;
34
+
35
+ for (int i = 0; i < 100; i++)
36
+ {
37
+ int index = rand() % TEST_CASES;
38
+
39
+ ref(pixel_test_buff[index] + j, pixel_out_c);
40
+ checked(opt, pixel_test_buff[index] + j, pixel_out_vec);
41
+
42
+ if (memcmp(pixel_out_c, pixel_out_vec, 64 * 64 * sizeof(pixel)))
43
+ return false;
44
+
45
+ reportfail();
46
+ j += FENC_STRIDE;
47
+ }
48
+ return true;
49
+}
50
bool IntraPredHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
51
{
52
for (int i = BLOCK_4x4; i <= BLOCK_32x32; i++)
53
54
return false;
55
}
56
}
57
+ if (opt.cu[i].intra_filter)
58
+ {
59
+ if (!check_intra_filter_primitive(ref.cu[i].intra_filter, opt.cu[i].intra_filter))
60
+ {
61
+ printf("intra_filter_%dx%d failed\n", size, size);
62
+ return false;
63
+ }
64
+ }
65
}
66
67
return true;
68
69
pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, mode, bFilter);
70
}
71
}
72
+ if (opt.cu[i].intra_filter)
73
+ {
74
+ printf("intra_filter_%dx%d", size, size);
75
+ REPORT_SPEEDUP(opt.cu[i].intra_filter, ref.cu[i].intra_filter, pixel_buff, pixel_out_c);
76
+ }
77
}
78
}
79
x265_1.7.tar.gz/source/test/intrapredharness.h -> x265_1.8.tar.gz/source/test/intrapredharness.h
Changed
25
1
2
enum { INPUT_SIZE = 4 * 65 * 65 * 100 };
3
enum { OUTPUT_SIZE = 64 * FENC_STRIDE };
4
enum { OUTPUT_SIZE_33 = 33 * OUTPUT_SIZE };
5
+ enum { TEST_CASES = 3 };
6
+ enum { INCR = 32 };
7
+ enum { STRIDE = 64 };
8
+ enum { ITERS = 100 };
9
+ enum { MAX_HEIGHT = 64 };
10
+ enum { PAD_ROWS = 64 };
11
+ enum { BUFFSIZE = STRIDE * (MAX_HEIGHT + PAD_ROWS) + INCR * ITERS };
12
13
+ pixel pixel_test_buff[TEST_CASES][BUFFSIZE];
14
ALIGN_VAR_16(pixel, pixel_buff[INPUT_SIZE]);
15
pixel pixel_out_c[OUTPUT_SIZE];
16
pixel pixel_out_vec[OUTPUT_SIZE];
17
18
bool check_planar_primitive(intra_pred_t ref, intra_pred_t opt, int width);
19
bool check_angular_primitive(const intra_pred_t ref[], const intra_pred_t opt[], int size);
20
bool check_allangs_primitive(const intra_allangs_t ref, const intra_allangs_t opt, int size);
21
+ bool check_intra_filter_primitive(const intra_filter_t ref, const intra_filter_t opt);
22
23
public:
24
25
x265_1.7.tar.gz/source/test/ipfilterharness.cpp -> x265_1.8.tar.gz/source/test/ipfilterharness.cpp
Changed
25
1
2
#include "common.h"
3
#include "ipfilterharness.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
IPFilterHarness::IPFilterHarness()
9
{
10
11
coeffIdx);
12
13
if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE * sizeof(int16_t)))
14
+ {
15
+ ref(pixel_test_buff[index] + 3 * rand_srcStride,
16
+ rand_srcStride,
17
+ IPF_C_output_s,
18
+ rand_dstStride,
19
+ coeffIdx);
20
return false;
21
+ }
22
23
reportfail();
24
}
25
x265_1.7.tar.gz/source/test/mbdstharness.cpp -> x265_1.8.tar.gz/source/test/mbdstharness.cpp
Changed
36
1
2
#include "common.h"
3
#include "mbdstharness.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
struct DctConf
9
{
10
11
12
MBDstHarness::MBDstHarness()
13
{
14
- const int idct_max = (1 << (BIT_DEPTH + 4)) - 1;
15
+ const int idct_max = (1 << (X265_DEPTH + 4)) - 1;
16
17
/* [0] --- Random values
18
* [1] --- Minimum
19
20
uint32_t optReturnValue = 0;
21
uint32_t refReturnValue = 0;
22
23
- int bits = (rand() % 24) + 8;
24
- int valueToAdd = rand() % (1 << bits);
25
+ int sliceType = rand() % 2;
26
+ int log2TrSize = rand() % 4 + 2;
27
+ int qp = rand() % (QP_MAX_SPEC + QP_BD_OFFSET + 1);
28
+ int per = qp / 6;
29
+ int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
30
+
31
+ int bits = QUANT_SHIFT + per + transformShift;
32
+ int valueToAdd = (sliceType == 1 ? 171 : 85) << (bits - 9);
33
int cmp_size = sizeof(int) * height * width;
34
int cmp_size1 = sizeof(short) * height * width;
35
int numCoeff = height * width;
36
x265_1.7.tar.gz/source/test/pixelharness.cpp -> x265_1.8.tar.gz/source/test/pixelharness.cpp
Changed
201
1
2
3
#include "pixelharness.h"
4
#include "primitives.h"
5
+#include "entropy.h"
6
7
-using namespace x265;
8
+using namespace X265_NS;
9
10
PixelHarness::PixelHarness()
11
{
12
13
return true;
14
}
15
16
-bool PixelHarness::check_pixelcmp_ss(pixelcmp_ss_t ref, pixelcmp_ss_t opt)
17
+bool PixelHarness::check_pixel_sse(pixel_sse_t ref, pixel_sse_t opt)
18
{
19
int j = 0;
20
intptr_t stride = STRIDE;
21
22
{
23
int index1 = rand() % TEST_CASES;
24
int index2 = rand() % TEST_CASES;
25
- int vres = (int)checked(opt, short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
26
- int cres = ref(short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
27
+ sse_ret_t vres = (sse_ret_t)checked(opt, pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
28
+ sse_ret_t cres = ref(pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
29
+ if (vres != cres)
30
+ return false;
31
+
32
+ reportfail();
33
+ j += INCR;
34
+ }
35
+
36
+ return true;
37
+}
38
+
39
+bool PixelHarness::check_pixel_sse_ss(pixel_sse_ss_t ref, pixel_sse_ss_t opt)
40
+{
41
+ int j = 0;
42
+ intptr_t stride = STRIDE;
43
+
44
+ for (int i = 0; i < ITERS; i++)
45
+ {
46
+ int index1 = rand() % TEST_CASES;
47
+ int index2 = rand() % TEST_CASES;
48
+ sse_ret_t vres = (sse_ret_t)checked(opt, short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
49
+ sse_ret_t cres = ref(short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
50
if (vres != cres)
51
return false;
52
53
54
ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
55
ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
56
57
- memset(ref_dest, 0xCD, sizeof(ref_dest));
58
- memset(opt_dest, 0xCD, sizeof(opt_dest));
59
+ for (int i = 0; i < 64 * 64; i++)
60
+ ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX);
61
62
int j = 0;
63
64
65
ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
66
ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
67
68
- memset(ref_dest, 0xCD, sizeof(ref_dest));
69
- memset(opt_dest, 0xCD, sizeof(opt_dest));
70
+ for (int i = 0; i < 64 * 64; i++)
71
+ ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX);
72
73
int j = 0;
74
75
76
ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
77
ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
78
79
- memset(ref_dest, 0xCD, sizeof(ref_dest));
80
- memset(opt_dest, 0xCD, sizeof(opt_dest));
81
+ for (int i = 0; i < 64 * 64; i++)
82
+ ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX);
83
84
for (int id = 0; id < 2; id++)
85
{
86
87
ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
88
ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
89
90
- memset(ref_dest, 0xCD, sizeof(ref_dest));
91
- memset(opt_dest, 0xCD, sizeof(opt_dest));
92
+ for (int i = 0; i < 64 * 64; i++)
93
+ ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX);
94
95
int j = 0;
96
97
98
return true;
99
}
100
101
+bool PixelHarness::check_saoCuStatsBO_t(saoCuStatsBO_t ref, saoCuStatsBO_t opt)
102
+{
103
+ enum { NUM_EDGETYPE = 33 }; // classIdx = 1 + (rec[x] >> 3);
104
+ int32_t stats_ref[NUM_EDGETYPE];
105
+ int32_t stats_vec[NUM_EDGETYPE];
106
+
107
+ int32_t count_ref[NUM_EDGETYPE];
108
+ int32_t count_vec[NUM_EDGETYPE];
109
+
110
+ int j = 0;
111
+ for (int i = 0; i < ITERS; i++)
112
+ {
113
+ // initialize input data to random, the dynamic range wrong but good to verify our asm code
114
+ for (int x = 0; x < NUM_EDGETYPE; x++)
115
+ {
116
+ stats_ref[x] = stats_vec[x] = rand();
117
+ count_ref[x] = count_vec[x] = rand();
118
+ }
119
+
120
+ intptr_t stride = 16 * (rand() % 4 + 1);
121
+ int endX = MAX_CU_SIZE - (rand() % 5);
122
+ int endY = MAX_CU_SIZE - (rand() % 4) - 1;
123
+
124
+ ref(pbuf2 + j + 1, pbuf3 + 1, stride, endX, endY, stats_ref, count_ref);
125
+ checked(opt, pbuf2 + j + 1, pbuf3 + 1, stride, endX, endY, stats_vec, count_vec);
126
+
127
+ if (memcmp(stats_ref, stats_vec, sizeof(stats_ref)) || memcmp(count_ref, count_vec, sizeof(count_ref)))
128
+ return false;
129
+
130
+ reportfail();
131
+ j += INCR;
132
+ }
133
+
134
+ return true;
135
+}
136
+
137
+bool PixelHarness::check_saoCuStatsE0_t(saoCuStatsE0_t ref, saoCuStatsE0_t opt)
138
+{
139
+ enum { NUM_EDGETYPE = 5 };
140
+ int32_t stats_ref[NUM_EDGETYPE];
141
+ int32_t stats_vec[NUM_EDGETYPE];
142
+
143
+ int32_t count_ref[NUM_EDGETYPE];
144
+ int32_t count_vec[NUM_EDGETYPE];
145
+
146
+ int j = 0;
147
+ for (int i = 0; i < ITERS; i++)
148
+ {
149
+ // initialize input data to random, the dynamic range wrong but good to verify our asm code
150
+ for (int x = 0; x < NUM_EDGETYPE; x++)
151
+ {
152
+ stats_ref[x] = stats_vec[x] = rand();
153
+ count_ref[x] = count_vec[x] = rand();
154
+ }
155
+
156
+ intptr_t stride = 16 * (rand() % 4 + 1);
157
+ int endX = MAX_CU_SIZE - (rand() % 5) - 1;
158
+ int endY = MAX_CU_SIZE - (rand() % 4) - 1;
159
+
160
+ ref(pbuf2 + j + 1, pbuf3 + j + 1, stride, endX, endY, stats_ref, count_ref);
161
+ checked(opt, pbuf2 + j + 1, pbuf3 + j + 1, stride, endX, endY, stats_vec, count_vec);
162
+
163
+ if (memcmp(stats_ref, stats_vec, sizeof(stats_ref)) || memcmp(count_ref, count_vec, sizeof(count_ref)))
164
+ return false;
165
+
166
+ reportfail();
167
+ j += INCR;
168
+ }
169
+
170
+ return true;
171
+}
172
+
173
+bool PixelHarness::check_saoCuStatsE1_t(saoCuStatsE1_t ref, saoCuStatsE1_t opt)
174
+{
175
+ enum { NUM_EDGETYPE = 5 };
176
+ int32_t stats_ref[NUM_EDGETYPE];
177
+ int32_t stats_vec[NUM_EDGETYPE];
178
+
179
+ int32_t count_ref[NUM_EDGETYPE];
180
+ int32_t count_vec[NUM_EDGETYPE];
181
+
182
+ int8_t _upBuff1_ref[MAX_CU_SIZE + 2], *upBuff1_ref = _upBuff1_ref + 1;
183
+ int8_t _upBuff1_vec[MAX_CU_SIZE + 2], *upBuff1_vec = _upBuff1_vec + 1;
184
+
185
+ int j = 0;
186
+
187
+ for (int i = 0; i < ITERS; i++)
188
+ {
189
+ // initialize input data to random, the dynamic range wrong but good to verify our asm code
190
+ for (int x = 0; x < NUM_EDGETYPE; x++)
191
+ {
192
+ stats_ref[x] = stats_vec[x] = rand();
193
+ count_ref[x] = count_vec[x] = rand();
194
+ }
195
+
196
+ // initial sign
197
+ for (int x = 0; x < MAX_CU_SIZE + 2; x++)
198
+ _upBuff1_ref[x] = _upBuff1_vec[x] = (rand() % 3) - 1;
199
+
200
+ intptr_t stride = 16 * (rand() % 4 + 1);
201
x265_1.7.tar.gz/source/test/pixelharness.h -> x265_1.8.tar.gz/source/test/pixelharness.h
Changed
32
1
2
double double_test_buff[TEST_CASES][BUFFSIZE];
3
4
bool check_pixelcmp(pixelcmp_t ref, pixelcmp_t opt);
5
- bool check_pixelcmp_ss(pixelcmp_ss_t ref, pixelcmp_ss_t opt);
6
+ bool check_pixel_sse(pixel_sse_t ref, pixel_sse_t opt);
7
+ bool check_pixel_sse_ss(pixel_sse_ss_t ref, pixel_sse_ss_t opt);
8
bool check_pixelcmp_x3(pixelcmp_x3_t ref, pixelcmp_x3_t opt);
9
bool check_pixelcmp_x4(pixelcmp_x4_t ref, pixelcmp_x4_t opt);
10
bool check_copy_pp(copy_pp_t ref, copy_pp_t opt);
11
12
bool check_saoCuOrgE3_t(saoCuOrgE3_t ref, saoCuOrgE3_t opt);
13
bool check_saoCuOrgE3_32_t(saoCuOrgE3_t ref, saoCuOrgE3_t opt);
14
bool check_saoCuOrgB0_t(saoCuOrgB0_t ref, saoCuOrgB0_t opt);
15
+ bool check_saoCuStatsBO_t(saoCuStatsBO_t ref, saoCuStatsBO_t opt);
16
+ bool check_saoCuStatsE0_t(saoCuStatsE0_t ref, saoCuStatsE0_t opt);
17
+ bool check_saoCuStatsE1_t(saoCuStatsE1_t ref, saoCuStatsE1_t opt);
18
+ bool check_saoCuStatsE2_t(saoCuStatsE2_t ref, saoCuStatsE2_t opt);
19
+ bool check_saoCuStatsE3_t(saoCuStatsE3_t ref, saoCuStatsE3_t opt);
20
bool check_planecopy_sp(planecopy_sp_t ref, planecopy_sp_t opt);
21
bool check_planecopy_cp(planecopy_cp_t ref, planecopy_cp_t opt);
22
bool check_cutree_propagate_cost(cutree_propagate_cost ref, cutree_propagate_cost opt);
23
24
bool check_calSign(sign_t ref, sign_t opt);
25
bool check_scanPosLast(scanPosLast_t ref, scanPosLast_t opt);
26
bool check_findPosFirstLast(findPosFirstLast_t ref, findPosFirstLast_t opt);
27
+ bool check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt);
28
+ bool check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t opt);
29
30
public:
31
32
x265_1.7.tar.gz/source/test/regression-tests.txt -> x265_1.8.tar.gz/source/test/regression-tests.txt
Changed
137
1
2
# not auto-detected.
3
4
BasketballDrive_1920x1080_50.y4m,--preset faster --aq-strength 2 --merange 190
5
-BasketballDrive_1920x1080_50.y4m,--preset medium --ctu 16 --max-tu-size 8 --subme 7 --qg-size 32
6
+BasketballDrive_1920x1080_50.y4m,--preset medium --ctu 16 --max-tu-size 8 --subme 7 --qg-size 16 --cu-lossless
7
BasketballDrive_1920x1080_50.y4m,--preset medium --keyint -1 --nr-inter 100 -F4 --no-sao
8
-BasketballDrive_1920x1080_50.y4m,--preset slow --nr-intra 100 -F4 --aq-strength 3 --qg-size 16
9
+BasketballDrive_1920x1080_50.y4m,--preset slow --nr-intra 100 -F4 --aq-strength 3 --qg-size 16 --limit-refs 1
10
BasketballDrive_1920x1080_50.y4m,--preset slower --lossless --chromaloc 3 --subme 0
11
BasketballDrive_1920x1080_50.y4m,--preset superfast --psy-rd 1 --ctu 16 --no-wpp
12
BasketballDrive_1920x1080_50.y4m,--preset ultrafast --signhide --colormatrix bt709
13
BasketballDrive_1920x1080_50.y4m,--preset veryfast --tune zerolatency --no-temporal-mvp
14
-BasketballDrive_1920x1080_50.y4m,--preset veryslow --crf 4 --cu-lossless --pmode
15
+BasketballDrive_1920x1080_50.y4m,--preset veryslow --crf 4 --cu-lossless --pmode --limit-refs 1
16
Coastguard-4k.y4m,--preset medium --rdoq-level 1 --tune ssim --no-signhide --me umh
17
-Coastguard-4k.y4m,--preset slow --tune psnr --cbqpoffs -1 --crqpoffs 1
18
+Coastguard-4k.y4m,--preset slow --tune psnr --cbqpoffs -1 --crqpoffs 1 --limit-refs 1
19
Coastguard-4k.y4m,--preset superfast --tune grain --overscan=crop
20
CrowdRun_1920x1080_50_10bit_422.yuv,--preset fast --aq-mode 0 --sar 2 --range full
21
CrowdRun_1920x1080_50_10bit_422.yuv,--preset faster --max-tu-size 4 --min-cu-size 32
22
-CrowdRun_1920x1080_50_10bit_422.yuv,--preset medium --no-wpp --no-cutree --no-strong-intra-smoothing
23
+CrowdRun_1920x1080_50_10bit_422.yuv,--preset medium --no-wpp --no-cutree --no-strong-intra-smoothing --limit-refs 1
24
CrowdRun_1920x1080_50_10bit_422.yuv,--preset slow --no-wpp --tune ssim --transfer smpte240m
25
-CrowdRun_1920x1080_50_10bit_422.yuv,--preset slower --tune ssim --tune fastdecode
26
+CrowdRun_1920x1080_50_10bit_422.yuv,--preset slower --tune ssim --tune fastdecode --limit-refs 2
27
CrowdRun_1920x1080_50_10bit_422.yuv,--preset superfast --weightp --no-wpp --sao
28
CrowdRun_1920x1080_50_10bit_422.yuv,--preset ultrafast --weightp --tune zerolatency --qg-size 16
29
CrowdRun_1920x1080_50_10bit_422.yuv,--preset veryfast --temporal-layers --tune grain
30
CrowdRun_1920x1080_50_10bit_444.yuv,--preset medium --dither --keyint -1 --rdoq-level 1
31
CrowdRun_1920x1080_50_10bit_444.yuv,--preset superfast --weightp --dither --no-psy-rd
32
CrowdRun_1920x1080_50_10bit_444.yuv,--preset ultrafast --weightp --no-wpp --no-open-gop
33
-CrowdRun_1920x1080_50_10bit_444.yuv,--preset veryfast --temporal-layers --repeat-headers
34
+CrowdRun_1920x1080_50_10bit_444.yuv,--preset veryfast --temporal-layers --repeat-headers --limit-refs 2
35
CrowdRun_1920x1080_50_10bit_444.yuv,--preset veryslow --tskip --tskip-fast --no-scenecut
36
DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset medium --tune psnr --bframes 16
37
-DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset slow --temporal-layers --no-psy-rd --qg-size 32
38
+DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset slow --temporal-layers --no-psy-rd --qg-size 32 --limit-refs 0 --cu-lossless
39
DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset superfast --weightp --qg-size 16
40
DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset medium --nr-inter 500 -F4 --no-psy-rdoq
41
-DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset slower --no-weightp --rdoq-level 0
42
+DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset slower --no-weightp --rdoq-level 0 --limit-refs 3
43
DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset veryfast --weightp --nr-intra 1000 -F4
44
FourPeople_1280x720_60.y4m,--preset medium --qp 38 --no-psy-rd
45
FourPeople_1280x720_60.y4m,--preset superfast --no-wpp --lookahead-slices 2
46
Keiba_832x480_30.y4m,--preset medium --pmode --tune grain
47
-Keiba_832x480_30.y4m,--preset slower --fast-intra --nr-inter 500 -F4
48
+Keiba_832x480_30.y4m,--preset slower --fast-intra --nr-inter 500 -F4 --limit-refs 0
49
Keiba_832x480_30.y4m,--preset superfast --no-fast-intra --nr-intra 1000 -F4
50
Kimono1_1920x1080_24_10bit_444.yuv,--preset medium --min-cu-size 32
51
Kimono1_1920x1080_24_10bit_444.yuv,--preset superfast --weightb
52
KristenAndSara_1280x720_60.y4m,--preset medium --no-cutree --max-tu-size 16
53
-KristenAndSara_1280x720_60.y4m,--preset slower --pmode --max-tu-size 8
54
-KristenAndSara_1280x720_60.y4m,--preset superfast --min-cu-size 16 --qg-size 16
55
+KristenAndSara_1280x720_60.y4m,--preset slower --pmode --max-tu-size 8 --limit-refs 0
56
+KristenAndSara_1280x720_60.y4m,--preset superfast --min-cu-size 16 --qg-size 16 --limit-refs 1
57
KristenAndSara_1280x720_60.y4m,--preset ultrafast --strong-intra-smoothing
58
-NebutaFestival_2560x1600_60_10bit_crop.yuv,--preset medium --tune grain
59
+NebutaFestival_2560x1600_60_10bit_crop.yuv,--preset medium --tune grain --limit-refs 2
60
NebutaFestival_2560x1600_60_10bit_crop.yuv,--preset superfast --tune psnr
61
-News-4k.y4m,--preset medium --tune ssim --no-sao --qg-size 32
62
+News-4k.y4m,--preset medium --tune ssim --no-sao --qg-size 16
63
News-4k.y4m,--preset superfast --lookahead-slices 6 --aq-mode 0
64
OldTownCross_1920x1080_50_10bit_422.yuv,--preset medium --no-weightp
65
OldTownCross_1920x1080_50_10bit_422.yuv,--preset slower --tune fastdecode
66
67
RaceHorses_416x240_30.y4m,--preset medium --tskip-fast --tskip
68
RaceHorses_416x240_30.y4m,--preset slower --keyint -1 --rdoq-level 0
69
RaceHorses_416x240_30.y4m,--preset superfast --no-cutree
70
-RaceHorses_416x240_30.y4m,--preset veryslow --tskip-fast --tskip
71
-RaceHorses_416x240_30_10bit.yuv,--preset fast --lookahead-slices 2 --b-intra
72
+RaceHorses_416x240_30.y4m,--preset veryslow --tskip-fast --tskip --limit-refs 3
73
+RaceHorses_416x240_30_10bit.yuv,--preset fast --lookahead-slices 2 --b-intra --limit-refs 1
74
RaceHorses_416x240_30_10bit.yuv,--preset faster --rdoq-level 0 --dither
75
RaceHorses_416x240_30_10bit.yuv,--preset slow --tune grain
76
-RaceHorses_416x240_30_10bit.yuv,--preset ultrafast --tune psnr
77
+RaceHorses_416x240_30_10bit.yuv,--preset ultrafast --tune psnr --limit-refs 1
78
RaceHorses_416x240_30_10bit.yuv,--preset veryfast --weightb
79
-RaceHorses_416x240_30_10bit.yuv,--preset placebo
80
+RaceHorses_416x240_30_10bit.yuv,--preset placebo --limit-refs 1
81
SteamLocomotiveTrain_2560x1600_60_10bit_crop.yuv,--preset medium --dither
82
big_buck_bunny_360p24.y4m,--preset faster --keyint 240 --min-keyint 60 --rc-lookahead 200
83
-big_buck_bunny_360p24.y4m,--preset medium --keyint 60 --min-keyint 48 --weightb
84
+big_buck_bunny_360p24.y4m,--preset medium --keyint 60 --min-keyint 48 --weightb --limit-refs 3
85
big_buck_bunny_360p24.y4m,--preset slow --psy-rdoq 2.0 --rdoq-level 1 --no-b-intra
86
big_buck_bunny_360p24.y4m,--preset superfast --psy-rdoq 2.0
87
big_buck_bunny_360p24.y4m,--preset ultrafast --deblock=2
88
89
city_4cif_60fps.y4m,--preset medium --crf 4 --cu-lossless --sao-non-deblock
90
city_4cif_60fps.y4m,--preset superfast --rdpenalty 1 --tu-intra-depth 2
91
city_4cif_60fps.y4m,--preset slower --scaling-list default
92
-city_4cif_60fps.y4m,--preset veryslow --rdpenalty 2 --sao-non-deblock --no-b-intra
93
+city_4cif_60fps.y4m,--preset veryslow --rdpenalty 2 --sao-non-deblock --no-b-intra --limit-refs 0
94
ducks_take_off_420_720p50.y4m,--preset fast --deblock 6 --bframes 16 --rc-lookahead 40
95
-ducks_take_off_420_720p50.y4m,--preset faster --qp 24 --deblock -6
96
+ducks_take_off_420_720p50.y4m,--preset faster --qp 24 --deblock -6 --limit-refs 2
97
ducks_take_off_420_720p50.y4m,--preset medium --tskip --tskip-fast --constrained-intra
98
ducks_take_off_420_720p50.y4m,--preset slow --scaling-list default --qp 40
99
ducks_take_off_420_720p50.y4m,--preset ultrafast --constrained-intra --rd 1
100
ducks_take_off_420_720p50.y4m,--preset veryslow --constrained-intra --bframes 2
101
ducks_take_off_444_720p50.y4m,--preset medium --qp 38 --no-scenecut
102
-ducks_take_off_444_720p50.y4m,--preset superfast --weightp --rd 0
103
-ducks_take_off_444_720p50.y4m,--preset slower --psy-rd 1 --psy-rdoq 2.0 --rdoq-level 1
104
+ducks_take_off_444_720p50.y4m,--preset superfast --weightp --rd 0 --limit-refs 2
105
+ducks_take_off_444_720p50.y4m,--preset slower --psy-rd 1 --psy-rdoq 2.0 --rdoq-level 1 --limit-refs 1
106
mobile_calendar_422_ntsc.y4m,--preset medium --bitrate 500 -F4
107
mobile_calendar_422_ntsc.y4m,--preset slower --tskip --tskip-fast
108
mobile_calendar_422_ntsc.y4m,--preset superfast --weightp --rd 0
109
-mobile_calendar_422_ntsc.y4m,--preset veryslow --tskip
110
+mobile_calendar_422_ntsc.y4m,--preset veryslow --tskip --limit-refs 2
111
old_town_cross_444_720p50.y4m,--preset faster --rd 1 --tune zero-latency
112
old_town_cross_444_720p50.y4m,--preset medium --keyint -1 --no-weightp --ref 6
113
old_town_cross_444_720p50.y4m,--preset slow --rdoq-level 1 --early-skip --ref 7 --no-b-pyramid
114
115
vtc1nw_422_ntsc.y4m,--preset slower --nr-inter 1000 -F4 --tune fast-decode --qg-size 16
116
vtc1nw_422_ntsc.y4m,--preset superfast --weightp --nr-intra 100 -F4
117
washdc_422_ntsc.y4m,--preset faster --rdoq-level 1 --max-merge 5
118
-washdc_422_ntsc.y4m,--preset medium --no-weightp --max-tu-size 4
119
-washdc_422_ntsc.y4m,--preset slower --psy-rdoq 2.0 --rdoq-level 2 --qg-size 32
120
+washdc_422_ntsc.y4m,--preset medium --no-weightp --max-tu-size 4 --limit-refs 1
121
+washdc_422_ntsc.y4m,--preset slower --psy-rdoq 2.0 --rdoq-level 2 --qg-size 32 --limit-refs 1
122
washdc_422_ntsc.y4m,--preset superfast --psy-rd 1 --tune zerolatency
123
washdc_422_ntsc.y4m,--preset ultrafast --weightp --tu-intra-depth 4
124
washdc_422_ntsc.y4m,--preset veryfast --tu-inter-depth 4
125
-washdc_422_ntsc.y4m,--preset veryslow --crf 4 --cu-lossless
126
+washdc_422_ntsc.y4m,--preset veryslow --crf 4 --cu-lossless --limit-refs 3
127
+BasketballDrive_1920x1080_50.y4m,--preset medium --no-cutree --analysis-mode=save --bitrate 15000,--preset medium --no-cutree --analysis-mode=load --bitrate 13000,--preset medium --no-cutree --analysis-mode=load --bitrate 11000,--preset medium --no-cutree --analysis-mode=load --bitrate 9000,--preset medium --no-cutree --analysis-mode=load --bitrate 7000
128
+NebutaFestival_2560x1600_60_10bit_crop.yuv,--preset slow --no-cutree --analysis-mode=save --bitrate 15000,--preset slow --no-cutree --analysis-mode=load --bitrate 13000,--preset slow --no-cutree --analysis-mode=load --bitrate 11000,--preset slow --no-cutree --analysis-mode=load --bitrate 9000,--preset slow --no-cutree --analysis-mode=load --bitrate 7000
129
+old_town_cross_444_720p50.y4m,--preset veryslow --no-cutree --analysis-mode=save --bitrate 15000 --early-skip,--preset veryslow --no-cutree --analysis-mode=load --bitrate 13000 --early-skip,--preset veryslow --no-cutree --analysis-mode=load --bitrate 11000 --early-skip,--preset veryslow --no-cutree --analysis-mode=load --bitrate 9000 --early-skip,--preset veryslow --no-cutree --analysis-mode=load --bitrate 7000 --early-skip
130
+Johnny_1280x720_60.y4m,--preset medium --no-cutree --analysis-mode=save --bitrate 15000 --tskip-fast,--preset medium --no-cutree --analysis-mode=load --bitrate 13000 --tskip-fast,--preset medium --no-cutree --analysis-mode=load --bitrate 11000 --tskip-fast,--preset medium --no-cutree --analysis-mode=load --bitrate 9000 --tskip-fast,--preset medium --no-cutree --analysis-mode=load --bitrate 7000 --tskip-fast
131
+BasketballDrive_1920x1080_50.y4m,--preset medium --recon-y4m-exec "ffplay -i pipe:0 -autoexit"
132
+FourPeople_1280x720_60.y4m,--preset ultrafast --recon-y4m-exec "ffplay -i pipe:0 -autoexit"
133
+FourPeople_1280x720_60.y4m,--preset veryslow --recon-y4m-exec "ffplay -i pipe:0 -autoexit"
134
135
# interlace test, even though input YUV is not field seperated
136
CrowdRun_1920x1080_50_10bit_422.yuv,--preset fast --interlace bff
137
x265_1.7.tar.gz/source/test/smoke-tests.txt -> x265_1.8.tar.gz/source/test/smoke-tests.txt
Changed
18
1
2
3
big_buck_bunny_360p24.y4m,--preset=superfast --bitrate 400 --vbv-bufsize 600 --vbv-maxrate 400 --hrd --aud --repeat-headers
4
big_buck_bunny_360p24.y4m,--preset=medium --bitrate 1000 -F4 --cu-lossless --scaling-list default
5
-big_buck_bunny_360p24.y4m,--preset=slower --no-weightp --cu-stats --pme --qg-size 16
6
+big_buck_bunny_360p24.y4m,--preset=slower --no-weightp --pme --qg-size 16
7
washdc_422_ntsc.y4m,--preset=faster --no-strong-intra-smoothing --keyint 1 --qg-size 16
8
washdc_422_ntsc.y4m,--preset=medium --qp 40 --nr-inter 400 -F4
9
washdc_422_ntsc.y4m,--preset=veryslow --pmode --tskip --rdoq-level 0
10
old_town_cross_444_720p50.y4m,--preset=ultrafast --weightp --keyint -1
11
old_town_cross_444_720p50.y4m,--preset=fast --keyint 20 --min-cu-size 16
12
old_town_cross_444_720p50.y4m,--preset=slow --sao-non-deblock --pmode --qg-size 32
13
-RaceHorses_416x240_30_10bit.yuv,--preset=veryfast --cu-stats --max-tu-size 8
14
+RaceHorses_416x240_30_10bit.yuv,--preset=veryfast --max-tu-size 8
15
RaceHorses_416x240_30_10bit.yuv,--preset=slower --bitrate 500 -F4 --rdoq-level 1
16
CrowdRun_1920x1080_50_10bit_444.yuv,--preset=ultrafast --constrained-intra --min-keyint 5 --keyint 10
17
CrowdRun_1920x1080_50_10bit_444.yuv,--preset=medium --max-tu-size 16
18
x265_1.7.tar.gz/source/test/testbench.cpp -> x265_1.8.tar.gz/source/test/testbench.cpp
Changed
38
1
2
#include "param.h"
3
#include "cpu.h"
4
5
-using namespace x265;
6
+using namespace X265_NS;
7
8
const char* lumaPartStr[NUM_PU_SIZES] =
9
{
10
11
12
int main(int argc, char *argv[])
13
{
14
- int cpuid = x265::cpu_detect();
15
+ int cpuid = X265_NS::cpu_detect();
16
const char *testname = 0;
17
18
if (!(argc & 1))
19
20
}
21
22
int seed = (int)time(NULL);
23
- const char *bpp[] = { "8bpp", "16bpp" };
24
- printf("Using random seed %X %s\n", seed, bpp[HIGH_BIT_DEPTH]);
25
+ printf("Using random seed %X %dbit\n", seed, X265_DEPTH);
26
srand(seed);
27
28
// To disable classes of tests, simply comment them out in this list
29
30
31
for (int i = 0; test_arch[i].flag; i++)
32
{
33
- if (test_arch[i].flag & cpuid)
34
+ if ((test_arch[i].flag & cpuid) == test_arch[i].flag)
35
{
36
printf("Testing primitives: %s\n", test_arch[i].name);
37
fflush(stdout);
38
x265_1.7.tar.gz/source/test/testharness.h -> x265_1.8.tar.gz/source/test/testharness.h
Changed
73
1
2
#pragma warning(disable: 4324) // structure was padded due to __declspec(align())
3
#endif
4
5
-#if HIGH_BIT_DEPTH
6
-#define BIT_DEPTH 10
7
-#else
8
-#define BIT_DEPTH 8
9
-#endif
10
-#define PIXEL_MAX ((1 << BIT_DEPTH) - 1)
11
+#define PIXEL_MAX ((1 << X265_DEPTH) - 1)
12
#define PIXEL_MIN 0
13
#define SHORT_MAX 32767
14
#define SHORT_MIN -32767
15
#define UNSIGNED_SHORT_MAX 65535
16
17
-using namespace x265;
18
+using namespace X265_NS;
19
20
extern const char* lumaPartStr[NUM_PU_SIZES];
21
extern const char* const* chromaPartStr[X265_CSP_COUNT];
22
23
24
extern "C" {
25
#if X265_ARCH_X86
26
-int x265_stack_pagealign(int (*func)(), int align);
27
+int PFX(stack_pagealign)(int (*func)(), int align);
28
29
/* detect when callee-saved regs aren't saved
30
* needs an explicit asm check because it only sometimes crashes in normal use. */
31
-intptr_t x265_checkasm_call(intptr_t (*func)(), int *ok, ...);
32
-float x265_checkasm_call_float(float (*func)(), int *ok, ...);
33
+intptr_t PFX(checkasm_call)(intptr_t (*func)(), int *ok, ...);
34
+float PFX(checkasm_call_float)(float (*func)(), int *ok, ...);
35
#else
36
-#define x265_stack_pagealign(func, align) func()
37
+#define PFX(stack_pagealign)(func, align) func()
38
#endif
39
40
#if X86_64
41
42
* overwrite the junk written to the stack so there's no guarantee that it will always
43
* detect all functions that assumes zero-extension.
44
*/
45
-void x265_checkasm_stack_clobber(uint64_t clobber, ...);
46
+void PFX(checkasm_stack_clobber)(uint64_t clobber, ...);
47
#define checked(func, ...) ( \
48
m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \
49
- x265_checkasm_stack_clobber(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
50
+ PFX(checkasm_stack_clobber)(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
51
m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
52
m_rand, m_rand, m_rand, m_rand, m_rand), /* max_args+6 */ \
53
- x265_checkasm_call((intptr_t(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
54
+ PFX(checkasm_call)((intptr_t(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
55
56
#define checked_float(func, ...) ( \
57
m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \
58
- x265_checkasm_stack_clobber(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
59
+ PFX(checkasm_stack_clobber)(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
60
m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
61
m_rand, m_rand, m_rand, m_rand, m_rand), /* max_args+6 */ \
62
- x265_checkasm_call_float((float(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
63
+ PFX(checkasm_call_float)((float(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
64
#define reportfail() if (!m_ok) { fflush(stdout); fprintf(stderr, "stack clobber check failed at %s:%d", __FILE__, __LINE__); abort(); }
65
#elif ARCH_X86
66
-#define checked(func, ...) x265_checkasm_call((intptr_t(*)())func, &m_ok, __VA_ARGS__);
67
-#define checked_float(func, ...) x265_checkasm_call_float((float(*)())func, &m_ok, __VA_ARGS__);
68
+#define checked(func, ...) PFX(checkasm_call)((intptr_t(*)())func, &m_ok, __VA_ARGS__);
69
+#define checked_float(func, ...) PFX(checkasm_call_float)((float(*)())func, &m_ok, __VA_ARGS__);
70
71
#else // if X86_64
72
#define checked(func, ...) func(__VA_ARGS__)
73
x265_1.8.tar.gz/source/x265-extras.cpp
Added
201
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2015 x265 project
4
+ *
5
+ * Authors: Steve Borho <steve@borho.org>
6
+ * Selvakumar Nithiyaruban <selvakumar@multicorewareinc.com>
7
+ * Divya Manivannan <divya@multicorewareinc.com>
8
+ *
9
+ * This program is free software; you can redistribute it and/or modify
10
+ * it under the terms of the GNU General Public License as published by
11
+ * the Free Software Foundation; either version 2 of the License, or
12
+ * (at your option) any later version.
13
+ *
14
+ * This program is distributed in the hope that it will be useful,
15
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ * GNU General Public License for more details.
18
+ *
19
+ * You should have received a copy of the GNU General Public License
20
+ * along with this program; if not, write to the Free Software
21
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22
+ *
23
+ * This program is also available under a commercial proprietary license.
24
+ * For more information, contact us at license @ x265.com.
25
+ *****************************************************************************/
26
+
27
+#include "x265.h"
28
+#include "x265-extras.h"
29
+
30
+#include "common.h"
31
+
32
+using namespace X265_NS;
33
+
34
+static const char* summaryCSVHeader =
35
+ "Command, Date/Time, Elapsed Time, FPS, Bitrate, "
36
+ "Y PSNR, U PSNR, V PSNR, Global PSNR, SSIM, SSIM (dB), "
37
+ "I count, I ave-QP, I kbps, I-PSNR Y, I-PSNR U, I-PSNR V, I-SSIM (dB), "
38
+ "P count, P ave-QP, P kbps, P-PSNR Y, P-PSNR U, P-PSNR V, P-SSIM (dB), "
39
+ "B count, B ave-QP, B kbps, B-PSNR Y, B-PSNR U, B-PSNR V, B-SSIM (dB), "
40
+ "Version\n";
41
+
42
+FILE* x265_csvlog_open(const x265_api& api, const x265_param& param, const char* fname, int level)
43
+{
44
+ if (sizeof(x265_stats) != api.sizeof_stats || sizeof(x265_picture) != api.sizeof_picture)
45
+ {
46
+ fprintf(stderr, "extras [error]: structure size skew, unable to create CSV logfile\n");
47
+ return NULL;
48
+ }
49
+
50
+ FILE *csvfp = fopen(fname, "r");
51
+ if (csvfp)
52
+ {
53
+ /* file already exists, re-open for append */
54
+ fclose(csvfp);
55
+ return fopen(fname, "ab");
56
+ }
57
+ else
58
+ {
59
+ /* new CSV file, write header */
60
+ csvfp = fopen(fname, "wb");
61
+ if (csvfp)
62
+ {
63
+ if (level)
64
+ {
65
+ fprintf(csvfp, "Encode Order, Type, POC, QP, Bits, ");
66
+ if (param.rc.rateControlMode == X265_RC_CRF)
67
+ fprintf(csvfp, "RateFactor, ");
68
+ fprintf(csvfp, "Y PSNR, U PSNR, V PSNR, YUV PSNR, SSIM, SSIM (dB), List 0, List 1");
69
+ /* detailed performance statistics */
70
+ fprintf(csvfp, ", DecideWait (ms), Row0Wait (ms), Wall time (ms), Ref Wait Wall (ms), Total CTU time (ms), Stall Time (ms), Avg WPP, Row Blocks");
71
+ if (level >= 2)
72
+ {
73
+ uint32_t size = param.maxCUSize;
74
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
75
+ {
76
+ fprintf(csvfp, ", Intra %dx%d DC, Intra %dx%d Planar, Intra %dx%d Ang", size, size, size, size, size, size);
77
+ size /= 2;
78
+ }
79
+ fprintf(csvfp, ", 4x4");
80
+ size = param.maxCUSize;
81
+ if (param.bEnableRectInter)
82
+ {
83
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
84
+ {
85
+ fprintf(csvfp, ", Inter %dx%d, Inter %dx%d (Rect)", size, size, size, size);
86
+ if (param.bEnableAMP)
87
+ fprintf(csvfp, ", Inter %dx%d (Amp)", size, size);
88
+ size /= 2;
89
+ }
90
+ }
91
+ else
92
+ {
93
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
94
+ {
95
+ fprintf(csvfp, ", Inter %dx%d", size, size);
96
+ size /= 2;
97
+ }
98
+ }
99
+ size = param.maxCUSize;
100
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
101
+ {
102
+ fprintf(csvfp, ", Skip %dx%d", size, size);
103
+ size /= 2;
104
+ }
105
+ size = param.maxCUSize;
106
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
107
+ {
108
+ fprintf(csvfp, ", Merge %dx%d", size, size);
109
+ size /= 2;
110
+ }
111
+ fprintf(csvfp, ", Avg Luma Distortion, Avg Chroma Distortion, Avg psyEnergy, Avg Luma Level, Max Luma Level");
112
+ }
113
+ fprintf(csvfp, "\n");
114
+ }
115
+ else
116
+ fputs(summaryCSVHeader, csvfp);
117
+ }
118
+ return csvfp;
119
+ }
120
+}
121
+
122
+// per frame CSV logging
123
+void x265_csvlog_frame(FILE* csvfp, const x265_param& param, const x265_picture& pic, int level)
124
+{
125
+ if (!csvfp)
126
+ return;
127
+
128
+ const x265_frame_stats* frameStats = &pic.frameData;
129
+ fprintf(csvfp, "%d, %c-SLICE, %4d, %2.2lf, %10d,", frameStats->encoderOrder, frameStats->sliceType, frameStats->poc, frameStats->qp, (int)frameStats->bits);
130
+ if (param.rc.rateControlMode == X265_RC_CRF)
131
+ fprintf(csvfp, "%.3lf,", frameStats->rateFactor);
132
+ if (param.bEnablePsnr)
133
+ fprintf(csvfp, "%.3lf, %.3lf, %.3lf, %.3lf,", frameStats->psnrY, frameStats->psnrU, frameStats->psnrV, frameStats->psnr);
134
+ else
135
+ fputs(" -, -, -, -,", csvfp);
136
+ if (param.bEnableSsim)
137
+ fprintf(csvfp, " %.6f, %6.3f,", frameStats->ssim, x265_ssim2dB(frameStats->ssim));
138
+ else
139
+ fputs(" -, -,", csvfp);
140
+ if (frameStats->sliceType == 'I')
141
+ fputs(" -, -,", csvfp);
142
+ else
143
+ {
144
+ int i = 0;
145
+ while (frameStats->list0POC[i] != -1)
146
+ fprintf(csvfp, "%d ", frameStats->list0POC[i++]);
147
+ fprintf(csvfp, ",");
148
+ if (frameStats->sliceType != 'P')
149
+ {
150
+ i = 0;
151
+ while (frameStats->list1POC[i] != -1)
152
+ fprintf(csvfp, "%d ", frameStats->list1POC[i++]);
153
+ fprintf(csvfp, ",");
154
+ }
155
+ else
156
+ fputs(" -,", csvfp);
157
+ }
158
+ fprintf(csvfp, " %.1lf, %.1lf, %.1lf, %.1lf, %.1lf, %.1lf,", frameStats->decideWaitTime, frameStats->row0WaitTime, frameStats->wallTime, frameStats->refWaitWallTime, frameStats->totalCTUTime, frameStats->stallTime);
159
+ fprintf(csvfp, " %.3lf, %d", frameStats->avgWPP, frameStats->countRowBlocks);
160
+ if (level >= 2)
161
+ {
162
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
163
+ fprintf(csvfp, ", %5.2lf%%, %5.2lf%%, %5.2lf%%", frameStats->cuStats.percentIntraDistribution[depth][0], frameStats->cuStats.percentIntraDistribution[depth][1], frameStats->cuStats.percentIntraDistribution[depth][2]);
164
+ fprintf(csvfp, ", %5.2lf%%", frameStats->cuStats.percentIntraNxN);
165
+ if (param.bEnableRectInter)
166
+ {
167
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
168
+ {
169
+ fprintf(csvfp, ", %5.2lf%%, %5.2lf%%", frameStats->cuStats.percentInterDistribution[depth][0], frameStats->cuStats.percentInterDistribution[depth][1]);
170
+ if (param.bEnableAMP)
171
+ fprintf(csvfp, ", %5.2lf%%", frameStats->cuStats.percentInterDistribution[depth][2]);
172
+ }
173
+ }
174
+ else
175
+ {
176
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
177
+ fprintf(csvfp, ", %5.2lf%%", frameStats->cuStats.percentInterDistribution[depth][0]);
178
+ }
179
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
180
+ fprintf(csvfp, ", %5.2lf%%", frameStats->cuStats.percentSkipCu[depth]);
181
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
182
+ fprintf(csvfp, ", %5.2lf%%", frameStats->cuStats.percentMergeCu[depth]);
183
+ fprintf(csvfp, ", %.2lf, %.2lf, %.2lf, %.2lf, %d", frameStats->avgLumaDistortion, frameStats->avgChromaDistortion, frameStats->avgPsyEnergy, frameStats->avgLumaLevel, frameStats->maxLumaLevel);
184
+ }
185
+ fprintf(csvfp, "\n");
186
+ fflush(stderr);
187
+}
188
+
189
+void x265_csvlog_encode(FILE* csvfp, const x265_api& api, const x265_param& param, const x265_stats& stats, int level, int argc, char** argv)
190
+{
191
+ if (!csvfp)
192
+ return;
193
+
194
+ if (level)
195
+ {
196
+ // adding summary to a per-frame csv log file, so it needs a summary header
197
+ fprintf(csvfp, "\nSummary\n");
198
+ fputs(summaryCSVHeader, csvfp);
199
+ }
200
+
201
x265_1.8.tar.gz/source/x265-extras.h
Added
68
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2015 x265 project
4
+ *
5
+ * Authors: Steve Borho <steve@borho.org>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#ifndef X265_EXTRAS_H
26
+#define X265_EXTRAS_H 1
27
+
28
+#include "x265.h"
29
+
30
+#include <stdio.h>
31
+#include <stdint.h>
32
+
33
+#ifdef __cplusplus
34
+extern "C" {
35
+#endif
36
+
37
+#if _WIN32
38
+#define LIBAPI __declspec(dllexport)
39
+#else
40
+#define LIBAPI
41
+#endif
42
+
43
+/* Open a CSV log file. On success it returns a file handle which must be passed
44
+ * to x265_csvlog_frame() and/or x265_csvlog_encode(). The file handle must be
45
+ * closed by the caller using fclose(). If level is 0, then no frame logging
46
+ * header is written to the file. This function will return NULL if it is unable
47
+ * to open the file for write or if it detects a structure size skew */
48
+LIBAPI FILE* x265_csvlog_open(const x265_api& api, const x265_param& param, const char* fname, int level);
49
+
50
+/* Log frame statistics to the CSV file handle. level should have been non-zero
51
+ * in the call to x265_csvlog_open() if this function is called. */
52
+LIBAPI void x265_csvlog_frame(FILE* csvfp, const x265_param& param, const x265_picture& pic, int level);
53
+
54
+/* Log final encode statistics to the CSV file handle. 'argc' and 'argv' are
55
+ * intended to be command line arguments passed to the encoder. Encode
56
+ * statistics should be queried from the encoder just prior to closing it. */
57
+LIBAPI void x265_csvlog_encode(FILE* csvfp, const x265_api& api, const x265_param& param, const x265_stats& stats, int level, int argc, char** argv);
58
+
59
+/* In-place downshift from a bit-depth greater than 8 to a bit-depth of 8, using
60
+ * the residual bits to dither each row. */
61
+LIBAPI void x265_dither_image(const x265_api& api, x265_picture&, int picWidth, int picHeight, int16_t *errorBuf, int bitDepth);
62
+
63
+#ifdef __cplusplus
64
+}
65
+#endif
66
+
67
+#endif
68
x265_1.7.tar.gz/source/x265.cpp -> x265_1.8.tar.gz/source/x265.cpp
Changed
201
1
2
#pragma warning(disable: 4127) // conditional expression is constant, yes I know
3
#endif
4
5
+#include "x265.h"
6
+#include "x265-extras.h"
7
+#include "x265cli.h"
8
+
9
+#include "common.h"
10
#include "input/input.h"
11
#include "output/output.h"
12
#include "output/reconplay.h"
13
-#include "filters/filters.h"
14
-#include "common.h"
15
+
16
#include "param.h"
17
#include "cpu.h"
18
-#include "x265.h"
19
-#include "x265cli.h"
20
21
#if HAVE_VLD
22
/* Visual Leak Detector */
23
24
#define SetThreadExecutionState(es)
25
#endif
26
27
-using namespace x265;
28
+using namespace X265_NS;
29
30
/* Ctrl-C handler */
31
static volatile sig_atomic_t b_ctrl_c /* = 0 */;
32
33
ReconFile* recon;
34
OutputFile* output;
35
FILE* qpfile;
36
+ FILE* csvfpt;
37
+ const char* csvfn;
38
const char* reconPlayCmd;
39
const x265_api* api;
40
x265_param* param;
41
bool bProgress;
42
bool bForceY4m;
43
bool bDither;
44
+ int csvLogLevel;
45
uint32_t seek; // number of frames to skip from the beginning
46
uint32_t framesToBeEncoded; // number of frames to encode
47
uint64_t totalbytes;
48
49
recon = NULL;
50
output = NULL;
51
qpfile = NULL;
52
+ csvfpt = NULL;
53
+ csvfn = NULL;
54
reconPlayCmd = NULL;
55
api = NULL;
56
param = NULL;
57
58
startTime = x265_mdate();
59
prevUpdateTime = 0;
60
bDither = false;
61
+ csvLogLevel = 0;
62
}
63
64
void destroy();
65
66
if (qpfile)
67
fclose(qpfile);
68
qpfile = NULL;
69
+ if (csvfpt)
70
+ fclose(csvfpt);
71
+ csvfpt = NULL;
72
if (output)
73
output->release();
74
output = NULL;
75
76
77
bool CLIOptions::parse(int argc, char **argv)
78
{
79
- bool bError = 0;
80
- int help = 0;
81
+ bool bError = false;
82
+ int bShowHelp = false;
83
int inputBitDepth = 8;
84
int outputBitDepth = 0;
85
int reconFileBitDepth = 0;
86
87
tune = optarg;
88
else if (c == 'D')
89
outputBitDepth = atoi(optarg);
90
+ else if (c == 'P')
91
+ profile = optarg;
92
else if (c == '?')
93
- showHelp(param);
94
+ bShowHelp = true;
95
+ }
96
+
97
+ if (!outputBitDepth && profile)
98
+ {
99
+ /* try to derive the output bit depth from the requested profile */
100
+ if (strstr(profile, "10"))
101
+ outputBitDepth = 10;
102
+ else if (strstr(profile, "12"))
103
+ outputBitDepth = 12;
104
+ else
105
+ outputBitDepth = 8;
106
}
107
108
api = x265_api_get(outputBitDepth);
109
110
return true;
111
}
112
113
+ if (bShowHelp)
114
+ {
115
+ printVersion(param, api);
116
+ showHelp(param);
117
+ }
118
+
119
for (optind = 0;; )
120
{
121
int long_options_index = -1;
122
123
switch (c)
124
{
125
case 'h':
126
+ printVersion(param, api);
127
showHelp(param);
128
break;
129
130
case 'V':
131
- printVersion(param);
132
- x265_setup_primitives(param, -1);
133
+ printVersion(param, api);
134
+ x265_report_simd(param);
135
exit(0);
136
137
default:
138
139
if (0) ;
140
OPT2("frame-skip", "seek") this->seek = (uint32_t)x265_atoi(optarg, bError);
141
OPT("frames") this->framesToBeEncoded = (uint32_t)x265_atoi(optarg, bError);
142
+ OPT("csv") this->csvfn = optarg;
143
+ OPT("csv-log-level") this->csvLogLevel = x265_atoi(optarg, bError);
144
OPT("no-progress") this->bProgress = false;
145
OPT("output") outputfn = optarg;
146
OPT("input") inputfn = optarg;
147
148
OPT("dither") this->bDither = true;
149
OPT("recon-depth") reconFileBitDepth = (uint32_t)x265_atoi(optarg, bError);
150
OPT("y4m") this->bForceY4m = true;
151
- OPT("profile") profile = optarg; /* handled last */
152
- OPT("preset") /* handled above */;
153
- OPT("tune") /* handled above */;
154
+ OPT("profile") /* handled above */;
155
+ OPT("preset") /* handled above */;
156
+ OPT("tune") /* handled above */;
157
OPT("output-depth") /* handled above */;
158
OPT("recon-y4m-exec") reconPlayCmd = optarg;
159
OPT("qpfile")
160
161
return true;
162
}
163
164
- if (argc <= 1 || help)
165
+ if (argc <= 1)
166
+ {
167
+ api->param_default(param);
168
+ printVersion(param, api);
169
showHelp(param);
170
+ }
171
172
- if (inputfn == NULL || outputfn == NULL)
173
+ if (!inputfn || !outputfn)
174
{
175
- x265_log(param, X265_LOG_ERROR, "input or output file not specified, try -V for help\n");
176
+ x265_log(param, X265_LOG_ERROR, "input or output file not specified, try --help for help\n");
177
return true;
178
}
179
180
- if (param->internalBitDepth != api->max_bit_depth)
181
+ if (param->internalBitDepth != api->bit_depth)
182
{
183
- x265_log(param, X265_LOG_ERROR, "Only bit depths of %d are supported in this build\n", api->max_bit_depth);
184
+ x265_log(param, X265_LOG_ERROR, "Only bit depths of %d are supported in this build\n", api->bit_depth);
185
return true;
186
}
187
188
189
* 1 - unable to parse command line
190
* 2 - unable to open encoder
191
* 3 - unable to generate stream headers
192
- * 4 - encoder abort */
193
+ * 4 - encoder abort
194
+ * 5 - unable to open csv file */
195
196
int main(int argc, char **argv)
197
{
198
199
/* get the encoder parameters post-initialization */
200
api->encoder_parameters(encoder, param);
201
x265_1.7.tar.gz/source/x265.def.in -> x265_1.8.tar.gz/source/x265.def.in
Changed
6
1
2
x265_encoder_close
3
x265_cleanup
4
x265_api_get_${X265_BUILD}
5
+x265_api_query
6
x265_1.7.tar.gz/source/x265.h -> x265_1.8.tar.gz/source/x265.h
Changed
201
1
2
uint32_t numPartitions;
3
} x265_analysis_data;
4
5
+/* cu statistics */
6
+typedef struct x265_cu_stats
7
+{
8
+ double percentSkipCu[4]; // Percentage of skip cu in all depths
9
+ double percentMergeCu[4]; // Percentage of merge cu in all depths
10
+ double percentIntraDistribution[4][3]; // Percentage of DC, Planar, Angular intra modes in all depths
11
+ double percentInterDistribution[4][3]; // Percentage of 2Nx2N inter, rect and amp in all depths
12
+ double percentIntraNxN; // Percentage of 4x4 cu
13
+
14
+ /* All the above values will add up to 100%. */
15
+} x265_cu_stats;
16
+
17
+/* Frame level statistics */
18
+typedef struct x265_frame_stats
19
+{
20
+ double qp;
21
+ double rateFactor;
22
+ double psnrY;
23
+ double psnrU;
24
+ double psnrV;
25
+ double psnr;
26
+ double ssim;
27
+ double decideWaitTime;
28
+ double row0WaitTime;
29
+ double wallTime;
30
+ double refWaitWallTime;
31
+ double totalCTUTime;
32
+ double stallTime;
33
+ double avgWPP;
34
+ double avgLumaDistortion;
35
+ double avgChromaDistortion;
36
+ double avgPsyEnergy;
37
+ double avgLumaLevel;
38
+ uint64_t bits;
39
+ int encoderOrder;
40
+ int poc;
41
+ int countRowBlocks;
42
+ int list0POC[16];
43
+ int list1POC[16];
44
+ uint16_t maxLumaLevel;
45
+ char sliceType;
46
+ x265_cu_stats cuStats;
47
+} x265_frame_stats;
48
+
49
/* Used to pass pictures into the encoder, and to get picture data back out of
50
* the encoder. The input and output semantics are different */
51
typedef struct x265_picture
52
53
* this data structure */
54
x265_analysis_data analysisData;
55
56
+ /* Frame level statistics */
57
+ x265_frame_stats frameData;
58
+
59
} x265_picture;
60
61
typedef enum
62
63
#define X265_LOG_ERROR 0
64
#define X265_LOG_WARNING 1
65
#define X265_LOG_INFO 2
66
-#define X265_LOG_FRAME 3
67
-#define X265_LOG_DEBUG 4
68
-#define X265_LOG_FULL 5
69
+#define X265_LOG_DEBUG 3
70
+#define X265_LOG_FULL 4
71
72
#define X265_B_ADAPT_NONE 0
73
#define X265_B_ADAPT_FAST 1
74
75
#define X265_AQ_NONE 0
76
#define X265_AQ_VARIANCE 1
77
#define X265_AQ_AUTO_VARIANCE 2
78
+#define X265_AQ_AUTO_VARIANCE_BIASED 3
79
80
/* NOTE! For this release only X265_CSP_I420 and X265_CSP_I444 are supported */
81
82
83
X265_RC_CRF
84
} X265_RC_METHODS;
85
86
+/* slice type statistics */
87
+typedef struct x265_sliceType_stats
88
+{
89
+ double avgQp;
90
+ double bitrate;
91
+ double psnrY;
92
+ double psnrU;
93
+ double psnrV;
94
+ double ssim;
95
+ uint32_t numPics;
96
+} x265_sliceType_stats;
97
+
98
/* Output statistics from encoder */
99
typedef struct x265_stats
100
{
101
- double globalPsnrY;
102
- double globalPsnrU;
103
- double globalPsnrV;
104
- double globalPsnr;
105
- double globalSsim;
106
- double elapsedEncodeTime; /* wall time since encoder was opened */
107
- double elapsedVideoTime; /* encoded picture count / frame rate */
108
- double bitrate; /* accBits / elapsed video time */
109
- uint64_t accBits; /* total bits output thus far */
110
- uint32_t encodedPictureCount; /* number of output pictures thus far */
111
- uint32_t totalWPFrames; /* number of uni-directional weighted frames used */
112
+ double globalPsnrY;
113
+ double globalPsnrU;
114
+ double globalPsnrV;
115
+ double globalPsnr;
116
+ double globalSsim;
117
+ double elapsedEncodeTime; /* wall time since encoder was opened */
118
+ double elapsedVideoTime; /* encoded picture count / frame rate */
119
+ double bitrate; /* accBits / elapsed video time */
120
+ uint64_t accBits; /* total bits output thus far */
121
+ uint32_t encodedPictureCount; /* number of output pictures thus far */
122
+ uint32_t totalWPFrames; /* number of uni-directional weighted frames used */
123
+ x265_sliceType_stats statsI; /* statistics of I slice */
124
+ x265_sliceType_stats statsP; /* statistics of P slice */
125
+ x265_sliceType_stats statsB; /* statistics of B slice */
126
} x265_stats;
127
128
/* String values accepted by x265_param_parse() (and CLI) for various parameters */
129
130
static const char * const x265_colorprim_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "film", "bt2020", 0 };
131
static const char * const x265_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100",
132
"log316", "iec61966-2-4", "bt1361e", "iec61966-2-1", "bt2020-10", "bt2020-12",
133
- "smpte-st-2084", "smpte-st-428", 0 };
134
+ "smpte-st-2084", "smpte-st-428", "arib-std-b67", 0 };
135
static const char * const x265_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m",
136
"YCgCo", "bt2020nc", "bt2020c", 0 };
137
static const char * const x265_sar_names[] = { "undef", "1:1", "12:11", "10:11", "16:11", "40:33", "24:11", "20:11",
138
139
140
/*== Logging Features ==*/
141
142
- /* Enable analysis and logging distribution of CUs encoded across various
143
- * modes during mode decision. Default disabled */
144
+ /* Enable analysis and logging distribution of CUs. Now deprecated */
145
int bLogCuStats;
146
147
/* Enable the measurement and reporting of PSNR. Default is enabled */
148
149
* X265_LOG_FULL, default is X265_LOG_INFO */
150
int logLevel;
151
152
- /* filename of CSV log. If logLevel greater than or equal to X265_LOG_FRAME,
153
- * the encoder will emit per-slice statistics to this log file in encode
154
- * order. Otherwise the encoder will emit per-stream statistics into the log
155
- * file when x265_encoder_log is called (presumably at the end of the
156
- * encode) */
157
+ /* Filename of CSV log. Now deprecated */
158
const char* csvfn;
159
160
/*== Internal Picture Specification ==*/
161
162
#define X265_PARAM_BAD_VALUE (-2)
163
int x265_param_parse(x265_param *p, const char *name, const char *value);
164
165
-/* x265_param_apply_profile:
166
- * Applies the restrictions of the given profile. (one of below) */
167
-static const char * const x265_profile_names[] = { "main", "main10", "mainstillpicture", 0 };
168
+static const char * const x265_profile_names[] = {
169
+ /* HEVC v1 */
170
+ "main", "main10", "mainstillpicture", /* alias */ "msp",
171
+
172
+ /* HEVC v2 (Range Extensions) */
173
+ "main-intra", "main10-intra",
174
+ "main444-8", "main444-intra", "main444-stillpicture",
175
176
-/* (can be NULL, in which case the function will do nothing)
177
+ "main422-10", "main422-10-intra",
178
+ "main444-10", "main444-10-intra",
179
+
180
+ "main12", "main12-intra", /* Highly Experimental */
181
+ "main422-12", "main422-12-intra",
182
+ "main444-12", "main444-12-intra",
183
+
184
+ "main444-16-intra", "main444-16-stillpicture", /* Not Supported! */
185
+ 0
186
+};
187
+
188
+/* x265_param_apply_profile:
189
+ * Applies the restrictions of the given profile. (one of x265_profile_names)
190
+ * (can be NULL, in which case the function will do nothing)
191
+ * Note: the detected profile can be lower than the one specified to this
192
+ * function. This function will force the encoder parameters to fit within
193
+ * the specified profile, or fail if that is impossible.
194
* returns 0 on success, negative on failure (e.g. invalid profile name). */
195
int x265_param_apply_profile(x265_param *, const char *profile);
196
197
198
void x265_encoder_get_stats(x265_encoder *encoder, x265_stats *, uint32_t statsSizeBytes);
199
200
/* x265_encoder_log:
201
x265_1.7.tar.gz/source/x265cli.h -> x265_1.8.tar.gz/source/x265cli.h
Changed
147
1
2
#ifndef X265CLI_H
3
#define X265CLI_H 1
4
5
+#include "common.h"
6
+#include "param.h"
7
+
8
#include <getopt.h>
9
10
#ifdef __cplusplus
11
-namespace x265 {
12
+namespace X265_NS {
13
#endif
14
15
static const char short_options[] = "o:D:P:p:f:F:r:I:i:b:s:t:q:m:hwV?";
16
17
{ "allow-non-conformance",no_argument, NULL, 0 },
18
{ "no-allow-non-conformance",no_argument, NULL, 0 },
19
{ "csv", required_argument, NULL, 0 },
20
+ { "csv-log-level", required_argument, NULL, 0 },
21
{ "no-cu-stats", no_argument, NULL, 0 },
22
{ "cu-stats", no_argument, NULL, 0 },
23
{ "y4m", no_argument, NULL, 0 },
24
25
{ "no-b-pyramid", no_argument, NULL, 0 },
26
{ "b-pyramid", no_argument, NULL, 0 },
27
{ "ref", required_argument, NULL, 0 },
28
+ { "limit-refs", required_argument, NULL, 0 },
29
{ "no-weightp", no_argument, NULL, 0 },
30
{ "weightp", no_argument, NULL, 'w' },
31
{ "no-weightb", no_argument, NULL, 0 },
32
33
{ "transfer", required_argument, NULL, 0 },
34
{ "colormatrix", required_argument, NULL, 0 },
35
{ "chromaloc", required_argument, NULL, 0 },
36
- { "crop-rect", required_argument, NULL, 0 },
37
+ { "display-window", required_argument, NULL, 0 },
38
+ { "crop-rect", required_argument, NULL, 0 }, /* DEPRECATED */
39
{ "master-display", required_argument, NULL, 0 },
40
{ "max-cll", required_argument, NULL, 0 },
41
{ "no-dither", no_argument, NULL, 0 },
42
43
{ 0, 0, 0, 0 }
44
};
45
46
-static void printVersion(x265_param *param)
47
+static void printVersion(x265_param *param, const x265_api* api)
48
{
49
- x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", x265_version_str);
50
- x265_log(param, X265_LOG_INFO, "build info %s\n", x265_build_info_str);
51
+ x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str);
52
+ x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str);
53
}
54
55
static void showHelp(x265_param *param)
56
{
57
int level = param->logLevel;
58
- x265_param_default(param);
59
- printVersion(param);
60
61
#define OPT(value) (value ? "enabled" : "disabled")
62
#define H0 printf
63
64
H0("-V/--version Show version info and exit\n");
65
H0("\nOutput Options:\n");
66
H0("-o/--output <filename> Bitstream output file name\n");
67
- H0("-D/--output-depth 8|10 Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth);
68
- H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", x265::logLevelNames[param->logLevel + 1]);
69
+ H0("-D/--output-depth 8|10|12 Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth);
70
+ H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNames[param->logLevel + 1]);
71
H0(" --no-progress Disable CLI progress reports\n");
72
- H0(" --[no-]cu-stats Enable logging stats about distribution of cu across all modes. Default %s\n",OPT(param->bLogCuStats));
73
- H1(" --csv <filename> Comma separated log file, log level >= 3 frame log, else one line per run\n");
74
+ H0(" --csv <filename> Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n");
75
+ H0(" --csv-log-level Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n");
76
H0("\nInput Options:\n");
77
H0(" --input <filename> Raw YUV or Y4M input file name. `-` for stdin\n");
78
H1(" --y4m Force parsing of input stream as YUV4MPEG2 regardless of file extension\n");
79
80
H0(" --[no-]signhide Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding));
81
H1(" --[no-]tskip Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip));
82
H0("\nTemporal / motion search options:\n");
83
+ H0(" --max-merge <1..5> Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand);
84
+ H0(" --ref <integer> max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences);
85
+ H0(" --limit-refs <0|1|2|3> limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences);
86
H0(" --me <string> Motion search method dia hex umh star full. Default %d\n", param->searchMethod);
87
H0("-m/--subme <integer> Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine);
88
H0(" --merange <integer> Motion search range. Default %d\n", param->searchRange);
89
- H0(" --max-merge <1..5> Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand);
90
H0(" --[no-]rect Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter));
91
H0(" --[no-]amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP));
92
H1(" --[no-]temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp));
93
94
H1(" --bframe-bias <integer> Bias towards B frame decisions. Default %d\n", param->bFrameBias);
95
H0(" --b-adapt <0..2> 0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling. Default %d\n", param->bFrameAdaptive);
96
H0(" --[no-]b-pyramid Use B-frames as references. Default %s\n", OPT(param->bBPyramid));
97
- H0(" --ref <integer> max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences);
98
- H1(" --zones <zone0>/<zone1>/... Tweak the bitrate of regions of the video\n");
99
- H1(" Each zone is of the form\n");
100
- H1(" <start frame>,<end frame>,<option>\n");
101
- H1(" where <option> is either\n");
102
- H1(" q=<integer> (force QP)\n");
103
- H1(" or b=<float> (bitrate multiplier)\n");
104
H1(" --qpfile <string> Force frametypes and QPs for some or all frames\n");
105
H1(" Format of each line: framenumber frametype QP\n");
106
H1(" QP is optional (none lets x265 choose). Frametypes: I,i,P,B,b.\n");
107
108
H0(" --[no-]strict-cbr Enable stricter conditions and tolerance for bitrate deviations in CBR mode. Default %s\n", OPT(param->rc.bStrictCbr));
109
H0(" --analysis-mode <string|int> save - Dump analysis info into file, load - Load analysis buffers from the file. Default %d\n", param->analysisMode);
110
H0(" --analysis-file <filename> Specify file name used for either dumping or reading analysis data.\n");
111
- H0(" --aq-mode <integer> Mode for Adaptive Quantization - 0:none 1:uniform AQ 2:auto variance. Default %d\n", param->rc.aqMode);
112
+ H0(" --aq-mode <integer> Mode for Adaptive Quantization - 0:none 1:uniform AQ 2:auto variance 3:auto variance with bias to dark scenes. Default %d\n", param->rc.aqMode);
113
H0(" --aq-strength <float> Reduces blocking and blurring in flat and textured areas (0 to 3.0). Default %.2f\n", param->rc.aqStrength);
114
H0(" --qg-size <int> Specifies the size of the quantization group (64, 32, 16). Default %d\n", param->rc.qgSize);
115
H0(" --[no-]cutree Enable cutree for Adaptive Quantization. Default %s\n", OPT(param->rc.cuTree));
116
117
H1(" --cbqpoffs <integer> Chroma Cb QP Offset [-12..12]. Default %d\n", param->cbQpOffset);
118
H1(" --crqpoffs <integer> Chroma Cr QP Offset [-12..12]. Default %d\n", param->crQpOffset);
119
H1(" --scaling-list <string> Specify a file containing HM style quant scaling lists or 'default' or 'off'. Default: off\n");
120
+ H1(" --zones <zone0>/<zone1>/... Tweak the bitrate of regions of the video\n");
121
+ H1(" Each zone is of the form\n");
122
+ H1(" <start frame>,<end frame>,<option>\n");
123
+ H1(" where <option> is either\n");
124
+ H1(" q=<integer> (force QP)\n");
125
+ H1(" or b=<float> (bitrate multiplier)\n");
126
H1(" --lambda-file <string> Specify a file containing replacement values for the lambda tables\n");
127
H1(" MAX_MAX_QP+1 floats for lambda table, then again for lambda2 table\n");
128
H1(" Blank lines and lines starting with hash(#) are ignored\n");
129
130
H0(" Choose from 0=undef, 1=1:1(\"square\"), 2=12:11, 3=10:11, 4=16:11,\n");
131
H0(" 5=40:33, 6=24:11, 7=20:11, 8=32:11, 9=80:33, 10=18:11, 11=15:11,\n");
132
H0(" 12=64:33, 13=160:99, 14=4:3, 15=3:2, 16=2:1 or custom ratio of <int:int>. Default %d\n", param->vui.aspectRatioIdc);
133
- H1(" --crop-rect <string> Add 'left,top,right,bottom' to the bitstream-level cropping rectangle\n");
134
+ H1(" --display-window <string> Describe overscan cropping region as 'left,top,right,bottom' in pixels\n");
135
H1(" --overscan <string> Specify whether it is appropriate for decoder to show cropped region: undef, show or crop. Default undef\n");
136
H0(" --videoformat <string> Specify video format from undef, component, pal, ntsc, secam, mac. Default undef\n");
137
H0(" --range <string> Specify black level and range of luma and chroma signals as full or limited Default limited\n");
138
139
H0(" smpte240m, film, bt2020. Default undef\n");
140
H0(" --transfer <string> Specify transfer characteristics from undef, bt709, bt470m, bt470bg, smpte170m,\n");
141
H0(" smpte240m, linear, log100, log316, iec61966-2-4, bt1361e, iec61966-2-1,\n");
142
- H0(" bt2020-10, bt2020-12. Default undef\n");
143
+ H0(" bt2020-10, bt2020-12, smpte-st-2084, smpte-st-428, arib-std-b67. Default undef\n");
144
H1(" --colormatrix <string> Specify color matrix setting from undef, bt709, fcc, bt470bg, smpte170m,\n");
145
H1(" smpte240m, GBR, YCgCo, bt2020nc, bt2020c. Default undef\n");
146
H1(" --chromaloc <integer> Specify chroma sample location (0 to 5). Default of %d\n", param->vui.chromaSampleLocTypeTopField);
147