Packman Build Service PMBS

x265.changes Changed

@@ -1,4 +1,29 @@
 -------------------------------------------------------------------
+Sun Dec  1 10:44:50 UTC 2019 - Luigi Baldoni <aloisio@gmx.com>
+
+- Update to version 3.2.1
+  * Fix output change in analysis load
+  * Fix encoder crash with zones and add test for zones
+  * Fix: Performance drop in aq-mode 4 This patch moves the
+    memory handling part of the edge information required for
+    aq-mode 4 to the Frame class-level in that way it can be
+    reused by the threads.
+  * Fix: Performance drop in aq-mode 4 This patch moves the
+    memory handling part of the edge information required for
+    aq-mode 4 to the Frame class-level in that way it can be
+    reused by the threads.
+  * Fix output change observed during analysis-load for
+    inter-refine levels 2 and 3.
+  * Adaptive Frame duplication This patch does the following. 1.
+    Replaces 2-3 near-identical frames with one frame and sets
+    pic_struct based on frame doubling / tripling. 2. Add option
+    "--frame-dup" and "--dup-threshold' to enable frame
+    duplication and to set threshold for frame similarity
+    (optional).
+  * Fix: AQ mode 4 commit (21db162) introduces slowdown even is
+    not used AQ mode 4.
+
+-------------------------------------------------------------------
 Tue Oct 01 12:21:19 UTC 2019 - enzokiel@kabelmail.de
 
 - Update to version 3.2

​x
 
@@ -1,4 +1,29 @@
 -------------------------------------------------------------------
+Sun Dec  1 10:44:50 UTC 2019 - Luigi Baldoni <aloisio@gmx.com>
+
+- Update to version 3.2.1
+  * Fix output change in analysis load
+  * Fix encoder crash with zones and add test for zones
+  * Fix: Performance drop in aq-mode 4 This patch moves the
+    memory handling part of the edge information required for
+    aq-mode 4 to the Frame class-level in that way it can be
+    reused by the threads.
+  * Fix: Performance drop in aq-mode 4 This patch moves the
+    memory handling part of the edge information required for
+    aq-mode 4 to the Frame class-level in that way it can be
+    reused by the threads.
+  * Fix output change observed during analysis-load for
+    inter-refine levels 2 and 3.
+  * Adaptive Frame duplication This patch does the following. 1.
+    Replaces 2-3 near-identical frames with one frame and sets
+    pic_struct based on frame doubling / tripling. 2. Add option
+    "--frame-dup" and "--dup-threshold' to enable frame
+    duplication and to set threshold for frame similarity
+    (optional).
+  * Fix: AQ mode 4 commit (21db162) introduces slowdown even is
+    not used AQ mode 4.
+
+-------------------------------------------------------------------
 Tue Oct 01 12:21:19 UTC 2019 - enzokiel@kabelmail.de
 
 - Update to version 3.2
​

x265.spec Changed

 
@@ -21,7 +21,7 @@
 %define libname lib%{name}
 %define libsoname %{libname}-%{soname}
 Name:           x265
-Version:        3.2
+Version:        3.2.1
 Release:        0
 Summary:        A free h265/HEVC encoder - encoder binary
 License:        GPL-2.0-or-later
​

x265_3.2.tar.gz/.hg_archival.txt -> x265_3.2.1.tar.gz/.hg_archival.txt Changed

 
@@ -1,4 +1,5 @@
 repo: 09fe40627f03a0f9c3e6ac78b22ac93da23f9fdf
-node: 353572437201d551381002aebf20d244bd49ef17
+node: b5c86a64bbbede216b25092def72272ecde5523a
 branch: Release_3.2
-tag: 3.2
+latesttag: 3.2.1
+latesttagdistance: 1
​

x265_3.2.tar.gz/.hgtags -> x265_3.2.1.tar.gz/.hgtags Changed

 
@@ -36,3 +36,5 @@
 113518629fa54ffb491dd479e15c1f00dd39d376 3.1_RC1
 b4e38ce16d7c4b37a6482dc7ae61fd31071b6ff1 3.1_RC2
 20c9994e8bfbeb9443851b2b3a050cd98c8b147b 3.2_RC1
+353572437201d551381002aebf20d244bd49ef17 3.2
+7fa570ead8d361bf6055cd2a881a8e15f12110ae 3.2.1
​

x265_3.2.tar.gz/source/common/frame.cpp -> x265_3.2.1.tar.gz/source/common/frame.cpp Changed

@@ -57,6 +57,9 @@
     m_addOnPrevChange = NULL;
     m_classifyFrame = false;
     m_fieldNum = 0;
+    m_edgePic = NULL;
+    m_gaussianPic = NULL;
+    m_thetaPic = NULL;
 }
 
 bool Frame::create(x265_param *param, float* quantOffsets)
@@ -97,6 +100,20 @@
         CHECKED_MALLOC_ZERO(m_classifyCount, uint32_t, size);
     }
 
+    if (param->rc.aqMode == X265_AQ_EDGE || (param->rc.zonefileCount && param->rc.aqMode != 0))
+    {
+        uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize - 1) / param->maxCUSize;
+        uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize - 1) / param->maxCUSize;
+        uint32_t m_lumaMarginX = param->maxCUSize + 32; // search margin and 8-tap filter half-length, padded for 32-byte alignment
+        uint32_t m_lumaMarginY = param->maxCUSize + 16; // margin for 8-tap filter and infinite padding
+        intptr_t m_stride = (numCuInWidth * param->maxCUSize) + (m_lumaMarginX << 1);
+        int maxHeight = numCuInHeight * param->maxCUSize;
+
+        m_edgePic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
+        m_gaussianPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
+        m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
+    }
+
     if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize))
     {
         X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");
@@ -242,4 +259,11 @@
         X265_FREE_ZERO(m_classifyVariance);
         X265_FREE_ZERO(m_classifyCount);
     }
+
+    if (m_param->rc.aqMode == X265_AQ_EDGE || (m_param->rc.zonefileCount && m_param->rc.aqMode != 0))
+    {
+        X265_FREE(m_edgePic);
+        X265_FREE(m_gaussianPic);
+        X265_FREE(m_thetaPic);
+    }
 }

 
@@ -57,6 +57,9 @@
     m_addOnPrevChange = NULL;
     m_classifyFrame = false;
     m_fieldNum = 0;
+    m_edgePic = NULL;
+    m_gaussianPic = NULL;
+    m_thetaPic = NULL;
 }
 
 bool Frame::create(x265_param *param, float* quantOffsets)
@@ -97,6 +100,20 @@
         CHECKED_MALLOC_ZERO(m_classifyCount, uint32_t, size);
     }
 
+    if (param->rc.aqMode == X265_AQ_EDGE || (param->rc.zonefileCount && param->rc.aqMode != 0))
+    {
+        uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize - 1) / param->maxCUSize;
+        uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize - 1) / param->maxCUSize;
+        uint32_t m_lumaMarginX = param->maxCUSize + 32; // search margin and 8-tap filter half-length, padded for 32-byte alignment
+        uint32_t m_lumaMarginY = param->maxCUSize + 16; // margin for 8-tap filter and infinite padding
+        intptr_t m_stride = (numCuInWidth * param->maxCUSize) + (m_lumaMarginX << 1);
+        int maxHeight = numCuInHeight * param->maxCUSize;
+
+        m_edgePic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
+        m_gaussianPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
+        m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
+    }
+
     if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize))
     {
         X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");
@@ -242,4 +259,11 @@
         X265_FREE_ZERO(m_classifyVariance);
         X265_FREE_ZERO(m_classifyCount);
     }
+
+    if (m_param->rc.aqMode == X265_AQ_EDGE || (m_param->rc.zonefileCount && m_param->rc.aqMode != 0))
+    {
+        X265_FREE(m_edgePic);
+        X265_FREE(m_gaussianPic);
+        X265_FREE(m_thetaPic);
+    }
 }
​

x265_3.2.tar.gz/source/common/frame.h -> x265_3.2.1.tar.gz/source/common/frame.h Changed

 
@@ -131,6 +131,11 @@
     bool                   m_classifyFrame;
     int                    m_fieldNum;
 
+    /* aq-mode 4 : Gaussian, edge and theta frames for edge information */
+    pixel*                 m_edgePic;
+    pixel*                 m_gaussianPic;
+    pixel*                 m_thetaPic;
+
     Frame();
 
     bool create(x265_param *param, float* quantOffsets);
​

x265_3.2.tar.gz/source/encoder/analysis.cpp -> x265_3.2.1.tar.gz/source/encoder/analysis.cpp Changed

@@ -2475,7 +2475,7 @@
                     }
                     if (!mode.cu.m_mergeFlag[pu.puAbsPartIdx])
                     {
-                        if (m_param->mvRefine || m_param->interRefine == 1)
+                        if (m_param->interRefine == 1)
                             m_me.setSourcePU(*mode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height, m_param->searchMethod, m_param->subpelRefine, false);
                         //AMVP
                         MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 2];
@@ -2489,7 +2489,7 @@
 
                             int numMvc = mode.cu.getPMV(mode.interNeighbours, list, ref, mode.amvpCand[list][ref], mvc);
                             mvp = mode.amvpCand[list][ref][mode.cu.m_mvpIdx[list][pu.puAbsPartIdx]];
-                            if (m_param->mvRefine || m_param->interRefine == 1)
+                            if (m_param->interRefine == 1)
                             {
                                 MV outmv, mvpSelect[3];
                                 mvpSelect[0] = interDataCTU->mv[list][cuIdx + part].word;

 
@@ -2475,7 +2475,7 @@
                     }
                     if (!mode.cu.m_mergeFlag[pu.puAbsPartIdx])
                     {
-                        if (m_param->mvRefine || m_param->interRefine == 1)
+                        if (m_param->interRefine == 1)
                             m_me.setSourcePU(*mode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height, m_param->searchMethod, m_param->subpelRefine, false);
                         //AMVP
                         MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 2];
@@ -2489,7 +2489,7 @@
 
                             int numMvc = mode.cu.getPMV(mode.interNeighbours, list, ref, mode.amvpCand[list][ref], mvc);
                             mvp = mode.amvpCand[list][ref][mode.cu.m_mvpIdx[list][pu.puAbsPartIdx]];
-                            if (m_param->mvRefine || m_param->interRefine == 1)
+                            if (m_param->interRefine == 1)
                             {
                                 MV outmv, mvpSelect[3];
                                 mvpSelect[0] = interDataCTU->mv[list][cuIdx + part].word;
​

x265_3.2.tar.gz/source/encoder/api.cpp -> x265_3.2.1.tar.gz/source/encoder/api.cpp Changed

 
@@ -108,6 +108,7 @@
         int zoneCount = p->rc.zonefileCount ? p->rc.zonefileCount : p->rc.zoneCount;
         param->rc.zones = x265_zone_alloc(zoneCount, !!p->rc.zonefileCount);
         latestParam->rc.zones = x265_zone_alloc(zoneCount, !!p->rc.zonefileCount);
+        zoneParam->rc.zones = x265_zone_alloc(zoneCount, !!p->rc.zonefileCount);
     }
 
     x265_copy_params(param, p);
​

x265_3.2.tar.gz/source/encoder/search.cpp -> x265_3.2.1.tar.gz/source/encoder/search.cpp Changed

@@ -2156,14 +2156,17 @@
 {
     CUData& cu = interMode.cu;
     MV mv, mvmin, mvmax;
-    cu.clipMv(mv);
     int cand = 0, bestcost = INT_MAX;
-    do
+    while (cand < m_param->mvRefine)
     {
-        if (cand && (mvp[cand] == mvp[cand - 1] || (cand == 2 && mvp[cand] == mvp[cand - 2])))
+        if ((cand && mvp[cand] == mvp[cand - 1]) || (cand == 2 && (mvp[cand] == mvp[cand - 2] || mvp[cand] == mvp[cand - 1])))
+        {
+            cand++;
             continue;
+        }
         MV bestMV;
-        mv = mvp[cand];
+        mv = mvp[cand++];
+        cu.clipMv(mv);
         setSearchRange(cu, mv, m_param->searchRange, mvmin, mvmax);
         int cost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mv, numMvc, mvc, m_param->searchRange, bestMV, m_param->maxSlices,
         m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
@@ -2172,7 +2175,7 @@
             bestcost = cost;
             outmv = bestMV;
         }
-    }while (++cand < m_param->mvRefine);
+    }
 }
 /* find the best inter prediction for each PU of specified mode */
 void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t refMasks[2])
@@ -2246,7 +2249,13 @@
                 const MV* amvp = interMode.amvpCand[list][ref];
                 int mvpIdx = selectMVP(cu, pu, amvp, list, ref);
                 MV mvmin, mvmax, outmv, mvp;
-                mvp = amvp[mvpIdx];
+                if (useAsMVP)
+                {
+                    mvp = interDataCTU->mv[list][cuIdx + puIdx].word;
+                    mvpIdx = interDataCTU->mvpIdx[list][cuIdx + puIdx];
+                }
+                else
+                    mvp = amvp[mvpIdx];
                 if (m_param->searchMethod == X265_SEA)
                 {
                     int puX = puIdx & 1;
@@ -2259,28 +2268,26 @@
                 int satdCost;
                 if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && mvpIdx == bestME[list].mvpIdx)
                     mvpIn = bestME[list].mv;
-                if (useAsMVP)
+                if (useAsMVP && m_param->mvRefine > 1)
                 {
                     MV bestmv, mvpSel[3];
                     int mvpIdxSel[3];
                     satdCost = m_me.COST_MAX;
-                    mvpSel[0] = interDataCTU->mv[list][cuIdx + puIdx].word;
-                    mvpIdxSel[0] = interDataCTU->mvpIdx[list][cuIdx + puIdx];
-                    if (m_param->mvRefine > 1)
+                    mvpSel[0] = mvp;
+                    mvpIdxSel[0] = mvpIdx;
+                    mvpIdx = selectMVP(cu, pu, amvp, list, ref);
+                    mvpSel[1] = interMode.amvpCand[list][ref][mvpIdx];
+                    mvpIdxSel[1] = mvpIdx;
+                    if (m_param->mvRefine > 2)
                     {
-                        mvpSel[1] = interMode.amvpCand[list][ref][mvpIdx];
-                        mvpIdxSel[1] = mvpIdx;
-                        if (m_param->mvRefine > 2)
-                        {
-                            mvpSel[2] = interMode.amvpCand[list][ref][!mvpIdx];
-                            mvpIdxSel[2] = !mvpIdx;
-                        }
+                        mvpSel[2] = interMode.amvpCand[list][ref][!mvpIdx];
+                        mvpIdxSel[2] = !mvpIdx;
                     }
                     for (int cand = 0; cand < m_param->mvRefine; cand++)
                     {
                         if (cand && (mvpSel[cand] == mvpSel[cand - 1] || (cand == 2 && mvpSel[cand] == mvpSel[cand - 2])))
                             continue;
-                        setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax);
+                        setSearchRange(cu, mvpSel[cand], m_param->searchRange, mvmin, mvmax);
                         int bcost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvpSel[cand], numMvc, mvc, m_param->searchRange, bestmv, m_param->maxSlices,
                             m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
                         if (satdCost > bcost)
@@ -2291,6 +2298,7 @@
                             mvpIdx = mvpIdxSel[cand];
                         }
                     }
+                    mvpIn = mvp;
                 }
                 else
                 {

 
@@ -2156,14 +2156,17 @@
 {
     CUData& cu = interMode.cu;
     MV mv, mvmin, mvmax;
-    cu.clipMv(mv);
     int cand = 0, bestcost = INT_MAX;
-    do
+    while (cand < m_param->mvRefine)
     {
-        if (cand && (mvp[cand] == mvp[cand - 1] || (cand == 2 && mvp[cand] == mvp[cand - 2])))
+        if ((cand && mvp[cand] == mvp[cand - 1]) || (cand == 2 && (mvp[cand] == mvp[cand - 2] || mvp[cand] == mvp[cand - 1])))
+        {
+            cand++;
             continue;
+        }
         MV bestMV;
-        mv = mvp[cand];
+        mv = mvp[cand++];
+        cu.clipMv(mv);
         setSearchRange(cu, mv, m_param->searchRange, mvmin, mvmax);
         int cost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mv, numMvc, mvc, m_param->searchRange, bestMV, m_param->maxSlices,
         m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
@@ -2172,7 +2175,7 @@
             bestcost = cost;
             outmv = bestMV;
         }
-    }while (++cand < m_param->mvRefine);
+    }
 }
 /* find the best inter prediction for each PU of specified mode */
 void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t refMasks[2])
@@ -2246,7 +2249,13 @@
                 const MV* amvp = interMode.amvpCand[list][ref];
                 int mvpIdx = selectMVP(cu, pu, amvp, list, ref);
                 MV mvmin, mvmax, outmv, mvp;
-                mvp = amvp[mvpIdx];
+                if (useAsMVP)
+                {
+                    mvp = interDataCTU->mv[list][cuIdx + puIdx].word;
+                    mvpIdx = interDataCTU->mvpIdx[list][cuIdx + puIdx];
+                }
+                else
+                    mvp = amvp[mvpIdx];
                 if (m_param->searchMethod == X265_SEA)
                 {
                     int puX = puIdx & 1;
@@ -2259,28 +2268,26 @@
                 int satdCost;
                 if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && mvpIdx == bestME[list].mvpIdx)
                     mvpIn = bestME[list].mv;
-                if (useAsMVP)
+                if (useAsMVP && m_param->mvRefine > 1)
                 {
                     MV bestmv, mvpSel[3];
                     int mvpIdxSel[3];
                     satdCost = m_me.COST_MAX;
-                    mvpSel[0] = interDataCTU->mv[list][cuIdx + puIdx].word;
-                    mvpIdxSel[0] = interDataCTU->mvpIdx[list][cuIdx + puIdx];
-                    if (m_param->mvRefine > 1)
+                    mvpSel[0] = mvp;
+                    mvpIdxSel[0] = mvpIdx;
+                    mvpIdx = selectMVP(cu, pu, amvp, list, ref);
+                    mvpSel[1] = interMode.amvpCand[list][ref][mvpIdx];
+                    mvpIdxSel[1] = mvpIdx;
+                    if (m_param->mvRefine > 2)
                     {
-                        mvpSel[1] = interMode.amvpCand[list][ref][mvpIdx];
-                        mvpIdxSel[1] = mvpIdx;
-                        if (m_param->mvRefine > 2)
-                        {
-                            mvpSel[2] = interMode.amvpCand[list][ref][!mvpIdx];
-                            mvpIdxSel[2] = !mvpIdx;
-                        }
+                        mvpSel[2] = interMode.amvpCand[list][ref][!mvpIdx];
+                        mvpIdxSel[2] = !mvpIdx;
                     }
                     for (int cand = 0; cand < m_param->mvRefine; cand++)
                     {
                         if (cand && (mvpSel[cand] == mvpSel[cand - 1] || (cand == 2 && mvpSel[cand] == mvpSel[cand - 2])))
                             continue;
-                        setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax);
+                        setSearchRange(cu, mvpSel[cand], m_param->searchRange, mvmin, mvmax);
                         int bcost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvpSel[cand], numMvc, mvc, m_param->searchRange, bestmv, m_param->maxSlices,
                             m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
                         if (satdCost > bcost)
@@ -2291,6 +2298,7 @@
                             mvpIdx = mvpIdxSel[cand];
                         }
                     }
+                    mvpIn = mvp;
                 }
                 else
                 {
​

x265_3.2.tar.gz/source/encoder/slicetype.cpp -> x265_3.2.1.tar.gz/source/encoder/slicetype.cpp Changed

@@ -85,12 +85,22 @@
 
 } // end anonymous namespace
 
-void edgeFilter(Frame *curFrame, pixel *pic1, pixel *pic2, pixel *pic3, intptr_t stride, int height, int width)
+void edgeFilter(Frame *curFrame, x265_param* param)
 {
+    int height = curFrame->m_fencPic->m_picHeight;
+    int width = curFrame->m_fencPic->m_picWidth;
+    intptr_t stride = curFrame->m_fencPic->m_stride;
+    uint32_t numCuInHeight = (height + param->maxCUSize - 1) / param->maxCUSize;
+    int maxHeight = numCuInHeight * param->maxCUSize;
+
+    memset(curFrame->m_edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+    memset(curFrame->m_gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+    memset(curFrame->m_thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+
     pixel *src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
-    pixel *edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
-    pixel *refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
-    pixel *edgeTheta = pic3 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+    pixel *edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+    pixel *refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+    pixel *edgeTheta = curFrame->m_thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
 
     for (int i = 0; i < height; i++)
     {
@@ -103,7 +113,7 @@
 
     //Applying Gaussian filter on the picture
     src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
-    refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+    refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
     pixel pixelValue = 0;
 
     for (int rowNum = 0; rowNum < height; rowNum++)
@@ -148,7 +158,7 @@
     float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
     float gradientMagnitude = 0;
     pixel blackPixel = 0;
-    edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+    edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
     //Applying Sobel filter on the gaussian filtered picture
     for (int rowNum = 0; rowNum < height; rowNum++)
     {
@@ -198,8 +208,10 @@
     angle = sum / (size*size);
 }
 
-uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame,pixel *edgeImage, pixel *edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize)
+uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize)
 {
+    pixel *edgeImage = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
+    pixel *edgeTheta = curFrame->m_thetaPic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
     intptr_t srcStride = curFrame->m_fencPic->m_stride;
     intptr_t blockOffsetLuma = blockX + (blockY * srcStride);
     int plane = 0; // Sobel filter is applied only on Y component
@@ -478,24 +490,14 @@
             }
             else
             {
-#define AQ_EDGE_BIAS 0.5
-#define EDGE_INCLINATION 45
-                uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
-                int maxHeight = numCuInHeight * param->maxCUSize;
-                intptr_t stride = curFrame->m_fencPic->m_stride;
-                pixel *edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
-                pixel *gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
-                pixel *thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
-                memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
-                memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
-                memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
-                if (param->rc.aqMode == X265_AQ_EDGE)
-                    edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol);
-
                 int blockXY = 0, inclinedEdge = 0;
                 double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
                 double bias_strength = 0.f;
                 double strength = 0.f;
+
+                if (param->rc.aqMode == X265_AQ_EDGE)
+                    edgeFilter(curFrame, param);
+
                 if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE)
                 {
                     double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8)));
@@ -507,9 +509,7 @@
                             energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
                             if (param->rc.aqMode == X265_AQ_EDGE)
                             {
-                                pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
-                                pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
-                                edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
+                                edgeDensity = edgeDensityCu(curFrame, avgAngle, blockX, blockY, param->rc.qgSize);
                                 if (edgeDensity)
                                 {
                                     qp_adj = pow(edgeDensity * bit_depth_correction + 1, 0.1);
@@ -542,9 +542,6 @@
                 else
                     strength = param->rc.aqStrength * 1.0397f;
 
-                X265_FREE(edgePic);
-                X265_FREE(gaussianPic);
-                X265_FREE(thetaPic);
                 blockXY = 0;
                 for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
                 {

 
@@ -85,12 +85,22 @@
 
 } // end anonymous namespace
 
-void edgeFilter(Frame *curFrame, pixel *pic1, pixel *pic2, pixel *pic3, intptr_t stride, int height, int width)
+void edgeFilter(Frame *curFrame, x265_param* param)
 {
+    int height = curFrame->m_fencPic->m_picHeight;
+    int width = curFrame->m_fencPic->m_picWidth;
+    intptr_t stride = curFrame->m_fencPic->m_stride;
+    uint32_t numCuInHeight = (height + param->maxCUSize - 1) / param->maxCUSize;
+    int maxHeight = numCuInHeight * param->maxCUSize;
+
+    memset(curFrame->m_edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+    memset(curFrame->m_gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+    memset(curFrame->m_thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+
     pixel *src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
-    pixel *edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
-    pixel *refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
-    pixel *edgeTheta = pic3 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+    pixel *edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+    pixel *refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+    pixel *edgeTheta = curFrame->m_thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
 
     for (int i = 0; i < height; i++)
     {
@@ -103,7 +113,7 @@
 
     //Applying Gaussian filter on the picture
     src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
-    refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+    refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
     pixel pixelValue = 0;
 
     for (int rowNum = 0; rowNum < height; rowNum++)
@@ -148,7 +158,7 @@
     float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
     float gradientMagnitude = 0;
     pixel blackPixel = 0;
-    edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+    edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
     //Applying Sobel filter on the gaussian filtered picture
     for (int rowNum = 0; rowNum < height; rowNum++)
     {
@@ -198,8 +208,10 @@
     angle = sum / (size*size);
 }
 
-uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame,pixel *edgeImage, pixel *edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize)
+uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize)
 {
+    pixel *edgeImage = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
+    pixel *edgeTheta = curFrame->m_thetaPic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
     intptr_t srcStride = curFrame->m_fencPic->m_stride;
     intptr_t blockOffsetLuma = blockX + (blockY * srcStride);
     int plane = 0; // Sobel filter is applied only on Y component
@@ -478,24 +490,14 @@
             }
             else
             {
-#define AQ_EDGE_BIAS 0.5
-#define EDGE_INCLINATION 45
-                uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
-                int maxHeight = numCuInHeight * param->maxCUSize;
-                intptr_t stride = curFrame->m_fencPic->m_stride;
-                pixel *edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
-                pixel *gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
-                pixel *thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
-                memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
-                memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
-                memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
-                if (param->rc.aqMode == X265_AQ_EDGE)
-                    edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol);
-
                 int blockXY = 0, inclinedEdge = 0;
                 double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
                 double bias_strength = 0.f;
                 double strength = 0.f;
+
+                if (param->rc.aqMode == X265_AQ_EDGE)
+                    edgeFilter(curFrame, param);
+
                 if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE)
                 {
                     double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8)));
@@ -507,9 +509,7 @@
                             energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
                             if (param->rc.aqMode == X265_AQ_EDGE)
                             {
-                                pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
-                                pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
-                                edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
+                                edgeDensity = edgeDensityCu(curFrame, avgAngle, blockX, blockY, param->rc.qgSize);
                                 if (edgeDensity)
                                 {
                                     qp_adj = pow(edgeDensity * bit_depth_correction + 1, 0.1);
@@ -542,9 +542,6 @@
                 else
                     strength = param->rc.aqStrength * 1.0397f;
 
-                X265_FREE(edgePic);
-                X265_FREE(gaussianPic);
-                X265_FREE(thetaPic);
                 blockXY = 0;
                 for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
                 {
​

x265_3.2.tar.gz/source/encoder/slicetype.h -> x265_3.2.1.tar.gz/source/encoder/slicetype.h Changed

 
@@ -40,6 +40,8 @@
 
 #define LOWRES_COST_MASK  ((1 << 14) - 1)
 #define LOWRES_COST_SHIFT 14
+#define AQ_EDGE_BIAS 0.5
+#define EDGE_INCLINATION 45
 
 /* Thread local data for lookahead tasks */
 struct LookaheadTLD
@@ -92,7 +94,7 @@
 protected:
 
     uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize);
-    uint32_t edgeDensityCu(Frame*curFrame, pixel *edgeImage, pixel *edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
+    uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
     uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
     uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
     bool     allocWeightedRef(Lowres& fenc);
​

x265_3.2.tar.gz/source/test/regression-tests.txt -> x265_3.2.1.tar.gz/source/test/regression-tests.txt Changed

 
@@ -154,8 +154,9 @@
 BasketballDrive_1920x1080_50.y4m, --preset medium --no-open-gop --keyint 50 --min-keyint 50 --radl 2 --vbv-maxrate 5000 --vbv-bufsize 5000
 big_buck_bunny_360p24.y4m, --bitrate 500 --fades
 720p50_parkrun_ter.y4m,--preset medium --bitrate 400 --hme
-ducks_take_off_420_1_720p50.y4m,--preset medium --aq-mode 4 --crf 22 --no-cutree
+ducks_take_off_420_720p50.y4m,--preset medium --aq-mode 4 --crf 22 --no-cutree
 ducks_take_off_420_1_720p50.y4m,--preset medium --selective-sao 4 --sao --crf 20
+Kimono1_1920x1080_24_400.yuv,--preset superfast --qp 28 --zones 0,139,q=32
 
 # Main12 intraCost overflow bug test
 720p50_parkrun_ter.y4m,--preset medium
​

Changes of Revision 35