HEVC Test Model (HM)  HM-16.3
TEncSearch.cpp
Go to the documentation of this file.
1 /* The copyright in this software is being made available under the BSD
2  * License, included below. This software may be subject to other third party
3  * and contributor rights, including patent rights, and no such rights are
4  * granted under this license.
5  *
6  * Copyright (c) 2010-2015, ITU/ISO/IEC
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  * this list of conditions and the following disclaimer in the documentation
16  * and/or other materials provided with the distribution.
17  * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18  * be used to endorse or promote products derived from this software without
19  * specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31  * THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
38 #include "TLibCommon/TypeDef.h"
39 #include "TLibCommon/TComRom.h"
41 #include "TEncSearch.h"
42 #include "TLibCommon/TComTU.h"
43 #include "TLibCommon/Debug.h"
44 #include <math.h>
45 #include <limits>
46 
47 
50 
51 static const TComMv s_acMvRefineH[9] =
52 {
53  TComMv( 0, 0 ), // 0
54  TComMv( 0, -1 ), // 1
55  TComMv( 0, 1 ), // 2
56  TComMv( -1, 0 ), // 3
57  TComMv( 1, 0 ), // 4
58  TComMv( -1, -1 ), // 5
59  TComMv( 1, -1 ), // 6
60  TComMv( -1, 1 ), // 7
61  TComMv( 1, 1 ) // 8
62 };
63 
64 static const TComMv s_acMvRefineQ[9] =
65 {
66  TComMv( 0, 0 ), // 0
67  TComMv( 0, -1 ), // 1
68  TComMv( 0, 1 ), // 2
69  TComMv( -1, -1 ), // 5
70  TComMv( 1, -1 ), // 6
71  TComMv( -1, 0 ), // 3
72  TComMv( 1, 0 ), // 4
73  TComMv( -1, 1 ), // 7
74  TComMv( 1, 1 ) // 8
75 };
76 
77 static const UInt s_auiDFilter[9] =
78 {
79  0, 1, 0,
80  2, 3, 2,
81  0, 1, 0
82 };
83 
84 static Void offsetSubTUCBFs(TComTU &rTu, const ComponentID compID)
85 {
86  TComDataCU *pcCU = rTu.getCU();
87  const UInt uiTrDepth = rTu.GetTransformDepthRel();
88  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(compID);
89  const UInt partIdxesPerSubTU = rTu.GetAbsPartIdxNumParts(compID) >> 1;
90 
91  //move the CBFs down a level and set the parent CBF
92 
93  UChar subTUCBF[2];
94  UChar combinedSubTUCBF = 0;
95 
96  for (UInt subTU = 0; subTU < 2; subTU++)
97  {
98  const UInt subTUAbsPartIdx = uiAbsPartIdx + (subTU * partIdxesPerSubTU);
99 
100  subTUCBF[subTU] = pcCU->getCbf(subTUAbsPartIdx, compID, uiTrDepth);
101  combinedSubTUCBF |= subTUCBF[subTU];
102  }
103 
104  for (UInt subTU = 0; subTU < 2; subTU++)
105  {
106  const UInt subTUAbsPartIdx = uiAbsPartIdx + (subTU * partIdxesPerSubTU);
107  const UChar compositeCBF = (subTUCBF[subTU] << 1) | combinedSubTUCBF;
108 
109  pcCU->setCbfPartRange((compositeCBF << uiTrDepth), compID, subTUAbsPartIdx, partIdxesPerSubTU);
110  }
111 }
112 
113 
115 {
116  for (UInt ch=0; ch<MAX_NUM_COMPONENT; ch++)
117  {
118  m_ppcQTTempCoeff[ch] = NULL;
119  m_pcQTTempCoeff[ch] = NULL;
120 #if ADAPTIVE_QP_SELECTION
122  m_pcQTTempArlCoeff[ch] = NULL;
123 #endif
124  m_puhQTTempCbf[ch] = NULL;
127  m_pcQTTempTUCoeff[ch] = NULL;
128 #if ADAPTIVE_QP_SELECTION
130 #endif
132  }
135  m_pcEncCfg = NULL;
137  m_pTempPel = NULL;
139 }
140 
141 
142 
143 
145 {
146  if ( m_pTempPel )
147  {
148  delete [] m_pTempPel;
149  m_pTempPel = NULL;
150  }
151 
152  if ( m_pcEncCfg )
153  {
154  const UInt uiNumLayersAllocated = m_pcEncCfg->getQuadtreeTULog2MaxSize()-m_pcEncCfg->getQuadtreeTULog2MinSize()+1;
155 
156  for (UInt ch=0; ch<MAX_NUM_COMPONENT; ch++)
157  {
158  for (UInt layer = 0; layer < uiNumLayersAllocated; layer++)
159  {
160  delete[] m_ppcQTTempCoeff[ch][layer];
161 #if ADAPTIVE_QP_SELECTION
162  delete[] m_ppcQTTempArlCoeff[ch][layer];
163 #endif
164  }
165  delete[] m_ppcQTTempCoeff[ch];
166  delete[] m_pcQTTempCoeff[ch];
167  delete[] m_puhQTTempCbf[ch];
168 #if ADAPTIVE_QP_SELECTION
169  delete[] m_ppcQTTempArlCoeff[ch];
170  delete[] m_pcQTTempArlCoeff[ch];
171 #endif
172  }
173 
174  for( UInt layer = 0; layer < uiNumLayersAllocated; layer++ )
175  {
176  m_pcQTTempTComYuv[layer].destroy();
177  }
178  }
179 
180  delete[] m_puhQTTempTrIdx;
181  delete[] m_pcQTTempTComYuv;
182 
183  for (UInt ch=0; ch<MAX_NUM_COMPONENT; ch++)
184  {
185  delete[] m_pSharedPredTransformSkip[ch];
186  delete[] m_pcQTTempTUCoeff[ch];
187 #if ADAPTIVE_QP_SELECTION
188  delete[] m_ppcQTTempTUArlCoeff[ch];
189 #endif
191  delete[] m_puhQTTempTransformSkipFlag[ch];
192  }
194 
196 }
197 
198 
199 
200 
202  TComTrQuant* pcTrQuant,
203  Int iSearchRange,
204  Int bipredSearchRange,
205  Int iFastSearch,
206  TEncEntropy* pcEntropyCoder,
207  TComRdCost* pcRdCost,
208  TEncSbac*** pppcRDSbacCoder,
209  TEncSbac* pcRDGoOnSbacCoder
210  )
211 {
212  m_pcEncCfg = pcEncCfg;
213  m_pcTrQuant = pcTrQuant;
214  m_iSearchRange = iSearchRange;
215  m_bipredSearchRange = bipredSearchRange;
216  m_iFastSearch = iFastSearch;
217  m_pcEntropyCoder = pcEntropyCoder;
218  m_pcRdCost = pcRdCost;
219 
220  m_pppcRDSbacCoder = pppcRDSbacCoder;
221  m_pcRDGoOnSbacCoder = pcRDGoOnSbacCoder;
222 
223  for (UInt iDir = 0; iDir < MAX_NUM_REF_LIST_ADAPT_SR; iDir++)
224  {
225  for (UInt iRefIdx = 0; iRefIdx < MAX_IDX_ADAPT_SR; iRefIdx++)
226  {
227  m_aaiAdaptSR[iDir][iRefIdx] = iSearchRange;
228  }
229  }
230 
232 
233  // initialize motion cost
234  for( Int iNum = 0; iNum < AMVP_MAX_NUM_CANDS+1; iNum++)
235  {
236  for( Int iIdx = 0; iIdx < AMVP_MAX_NUM_CANDS; iIdx++)
237  {
238  if (iIdx < iNum)
239  {
240  m_auiMVPIdxCost[iIdx][iNum] = xGetMvpIdxBits(iIdx, iNum);
241  }
242  else
243  {
244  m_auiMVPIdxCost[iIdx][iNum] = MAX_INT;
245  }
246  }
247  }
248 
249  const ChromaFormat cform=pcEncCfg->getChromaFormatIdc();
250  initTempBuff(cform);
251 
253 
254  const UInt uiNumLayersToAllocate = pcEncCfg->getQuadtreeTULog2MaxSize()-pcEncCfg->getQuadtreeTULog2MinSize()+1;
255  const UInt uiNumPartitions = 1<<(g_uiMaxCUDepth<<1);
256  for (UInt ch=0; ch<MAX_NUM_COMPONENT; ch++)
257  {
258  const UInt csx=::getComponentScaleX(ComponentID(ch), cform);
259  const UInt csy=::getComponentScaleY(ComponentID(ch), cform);
260  m_ppcQTTempCoeff[ch] = new TCoeff* [uiNumLayersToAllocate];
261  m_pcQTTempCoeff[ch] = new TCoeff [(g_uiMaxCUWidth*g_uiMaxCUHeight)>>(csx+csy) ];
262 #if ADAPTIVE_QP_SELECTION
263  m_ppcQTTempArlCoeff[ch] = new TCoeff*[uiNumLayersToAllocate];
264  m_pcQTTempArlCoeff[ch] = new TCoeff [(g_uiMaxCUWidth*g_uiMaxCUHeight)>>(csx+csy) ];
265 #endif
266  m_puhQTTempCbf[ch] = new UChar [uiNumPartitions];
267 
268  for (UInt layer = 0; layer < uiNumLayersToAllocate; layer++)
269  {
270  m_ppcQTTempCoeff[ch][layer] = new TCoeff[(g_uiMaxCUWidth*g_uiMaxCUHeight)>>(csx+csy)];
271 #if ADAPTIVE_QP_SELECTION
272  m_ppcQTTempArlCoeff[ch][layer] = new TCoeff[(g_uiMaxCUWidth*g_uiMaxCUHeight)>>(csx+csy) ];
273 #endif
274  }
275 
276  m_phQTTempCrossComponentPredictionAlpha[ch] = new Char [uiNumPartitions];
279 #if ADAPTIVE_QP_SELECTION
281 #endif
282  m_puhQTTempTransformSkipFlag[ch] = new UChar [uiNumPartitions];
283  }
284  m_puhQTTempTrIdx = new UChar [uiNumPartitions];
285  m_pcQTTempTComYuv = new TComYuv[uiNumLayersToAllocate];
286  for( UInt ui = 0; ui < uiNumLayersToAllocate; ++ui )
287  {
289  }
292 }
293 
294 #if FASTME_SMOOTHER_MV
295 #define FIRSTSEARCHSTOP 1
296 #else
297 #define FIRSTSEARCHSTOP 0
298 #endif
299 
300 #define TZ_SEARCH_CONFIGURATION \
301 const Int iRaster = 5; /* TZ soll von aussen ?ergeben werden */ \
302 const Bool bTestOtherPredictedMV = 0; \
303 const Bool bTestZeroVector = 1; \
304 const Bool bTestZeroVectorStart = 0; \
305 const Bool bTestZeroVectorStop = 0; \
306 const Bool bFirstSearchDiamond = 1; /* 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch */ \
307 const Bool bFirstSearchStop = FIRSTSEARCHSTOP; \
308 const UInt uiFirstSearchRounds = 3; /* first search stop X rounds after best match (must be >=1) */ \
309 const Bool bEnableRasterSearch = 1; \
310 const Bool bAlwaysRasterSearch = 0; /* ===== 1: BETTER but factor 2 slower ===== */ \
311 const Bool bRasterRefinementEnable = 0; /* enable either raster refinement or star refinement */ \
312 const Bool bRasterRefinementDiamond = 0; /* 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch */ \
313 const Bool bStarRefinementEnable = 1; /* enable either star refinement or raster refinement */ \
314 const Bool bStarRefinementDiamond = 1; /* 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch */ \
315 const Bool bStarRefinementStop = 0; \
316 const UInt uiStarRefinementRounds = 2; /* star refinement stop X rounds after best match (must be >=1) */ \
317 
318 
319 #define SEL_SEARCH_CONFIGURATION \
320  const Bool bTestOtherPredictedMV = 1; \
321  const Bool bTestZeroVector = 1; \
322  const Bool bEnableRasterSearch = 1; \
323  const Bool bAlwaysRasterSearch = 0; /* ===== 1: BETTER but factor 15x slower ===== */ \
324  const Bool bStarRefinementEnable = 1; /* enable either star refinement or raster refinement */ \
325  const Bool bStarRefinementDiamond = 1; /* 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch */ \
326  const Bool bStarRefinementStop = 0; \
327  const UInt uiStarRefinementRounds = 2; /* star refinement stop X rounds after best match (must be >=1) */ \
328  const UInt uiSearchRange = m_iSearchRange; \
329  const Int uiSearchRangeInitial = m_iSearchRange >> 2; \
330  const Int uiSearchStep = 4; \
331  const Int iMVDistThresh = 8; \
332 
333 
334 
335 __inline Void TEncSearch::xTZSearchHelp( TComPattern* pcPatternKey, IntTZSearchStruct& rcStruct, const Int iSearchX, const Int iSearchY, const UChar ucPointNr, const UInt uiDistance )
336 {
337  Distortion uiSad = 0;
338 
339  Pel* piRefSrch;
340 
341  piRefSrch = rcStruct.piRefY + iSearchY * rcStruct.iYStride + iSearchX;
342 
343  //-- jclee for using the SAD function pointer
344  m_pcRdCost->setDistParam( pcPatternKey, piRefSrch, rcStruct.iYStride, m_cDistParam );
345 
347  {
348  // fast encoder decision: use subsampled SAD when rows > 8 for integer ME
349  if ( m_pcEncCfg->getUseFastEnc() )
350  {
351  if ( m_cDistParam.iRows > 8 )
352  {
354  }
355  }
356  }
357 
359 
360  // distortion
363  {
364  Int isubShift = 0;
365  // motion cost
366  UInt uiBitCost = m_pcRdCost->getCost( iSearchX, iSearchY );
367 
368  if ( m_cDistParam.iRows > 32 )
369  {
371  }
372  else if ( m_cDistParam.iRows > 16 )
373  {
375  }
376  else if ( m_cDistParam.iRows > 8 )
377  {
379  }
380  else
381  {
383  }
384 
386  if((uiTempSad + uiBitCost) < rcStruct.uiBestSad)
387  {
388  uiSad += uiTempSad >> m_cDistParam.iSubShift;
389  while(m_cDistParam.iSubShift > 0)
390  {
391  isubShift = m_cDistParam.iSubShift -1;
392  m_cDistParam.pOrg = pcPatternKey->getROIY() + (pcPatternKey->getPatternLStride() << isubShift);
393  m_cDistParam.pCur = piRefSrch + (rcStruct.iYStride << isubShift);
394  uiTempSad = m_cDistParam.DistFunc( &m_cDistParam );
395  uiSad += uiTempSad >> m_cDistParam.iSubShift;
396  if(((uiSad << isubShift) + uiBitCost) > rcStruct.uiBestSad)
397  {
398  break;
399  }
400 
402  }
403 
404  if(m_cDistParam.iSubShift == 0)
405  {
406  uiSad += uiBitCost;
407  if( uiSad < rcStruct.uiBestSad )
408  {
409  rcStruct.uiBestSad = uiSad;
410  rcStruct.iBestX = iSearchX;
411  rcStruct.iBestY = iSearchY;
412  rcStruct.uiBestDistance = uiDistance;
413  rcStruct.uiBestRound = 0;
414  rcStruct.ucPointNr = ucPointNr;
415  }
416  }
417  }
418  }
419  else
420  {
421  uiSad = m_cDistParam.DistFunc( &m_cDistParam );
422 
423  // motion cost
424  uiSad += m_pcRdCost->getCost( iSearchX, iSearchY );
425 
426  if( uiSad < rcStruct.uiBestSad )
427  {
428  rcStruct.uiBestSad = uiSad;
429  rcStruct.iBestX = iSearchX;
430  rcStruct.iBestY = iSearchY;
431  rcStruct.uiBestDistance = uiDistance;
432  rcStruct.uiBestRound = 0;
433  rcStruct.ucPointNr = ucPointNr;
434  }
435  }
436 }
437 
438 
439 
440 
441 __inline Void TEncSearch::xTZ2PointSearch( TComPattern* pcPatternKey, IntTZSearchStruct& rcStruct, TComMv* pcMvSrchRngLT, TComMv* pcMvSrchRngRB )
442 {
443  Int iSrchRngHorLeft = pcMvSrchRngLT->getHor();
444  Int iSrchRngHorRight = pcMvSrchRngRB->getHor();
445  Int iSrchRngVerTop = pcMvSrchRngLT->getVer();
446  Int iSrchRngVerBottom = pcMvSrchRngRB->getVer();
447 
448  // 2 point search, // 1 2 3
449  // check only the 2 untested points // 4 0 5
450  // around the start point // 6 7 8
451  Int iStartX = rcStruct.iBestX;
452  Int iStartY = rcStruct.iBestY;
453  switch( rcStruct.ucPointNr )
454  {
455  case 1:
456  {
457  if ( (iStartX - 1) >= iSrchRngHorLeft )
458  {
459  xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY, 0, 2 );
460  }
461  if ( (iStartY - 1) >= iSrchRngVerTop )
462  {
463  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iStartY - 1, 0, 2 );
464  }
465  }
466  break;
467  case 2:
468  {
469  if ( (iStartY - 1) >= iSrchRngVerTop )
470  {
471  if ( (iStartX - 1) >= iSrchRngHorLeft )
472  {
473  xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY - 1, 0, 2 );
474  }
475  if ( (iStartX + 1) <= iSrchRngHorRight )
476  {
477  xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY - 1, 0, 2 );
478  }
479  }
480  }
481  break;
482  case 3:
483  {
484  if ( (iStartY - 1) >= iSrchRngVerTop )
485  {
486  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iStartY - 1, 0, 2 );
487  }
488  if ( (iStartX + 1) <= iSrchRngHorRight )
489  {
490  xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY, 0, 2 );
491  }
492  }
493  break;
494  case 4:
495  {
496  if ( (iStartX - 1) >= iSrchRngHorLeft )
497  {
498  if ( (iStartY + 1) <= iSrchRngVerBottom )
499  {
500  xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY + 1, 0, 2 );
501  }
502  if ( (iStartY - 1) >= iSrchRngVerTop )
503  {
504  xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY - 1, 0, 2 );
505  }
506  }
507  }
508  break;
509  case 5:
510  {
511  if ( (iStartX + 1) <= iSrchRngHorRight )
512  {
513  if ( (iStartY - 1) >= iSrchRngVerTop )
514  {
515  xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY - 1, 0, 2 );
516  }
517  if ( (iStartY + 1) <= iSrchRngVerBottom )
518  {
519  xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY + 1, 0, 2 );
520  }
521  }
522  }
523  break;
524  case 6:
525  {
526  if ( (iStartX - 1) >= iSrchRngHorLeft )
527  {
528  xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY , 0, 2 );
529  }
530  if ( (iStartY + 1) <= iSrchRngVerBottom )
531  {
532  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iStartY + 1, 0, 2 );
533  }
534  }
535  break;
536  case 7:
537  {
538  if ( (iStartY + 1) <= iSrchRngVerBottom )
539  {
540  if ( (iStartX - 1) >= iSrchRngHorLeft )
541  {
542  xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY + 1, 0, 2 );
543  }
544  if ( (iStartX + 1) <= iSrchRngHorRight )
545  {
546  xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY + 1, 0, 2 );
547  }
548  }
549  }
550  break;
551  case 8:
552  {
553  if ( (iStartX + 1) <= iSrchRngHorRight )
554  {
555  xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY, 0, 2 );
556  }
557  if ( (iStartY + 1) <= iSrchRngVerBottom )
558  {
559  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iStartY + 1, 0, 2 );
560  }
561  }
562  break;
563  default:
564  {
565  assert( false );
566  }
567  break;
568  } // switch( rcStruct.ucPointNr )
569 }
570 
571 
572 
573 
574 __inline Void TEncSearch::xTZ8PointSquareSearch( TComPattern* pcPatternKey, IntTZSearchStruct& rcStruct, TComMv* pcMvSrchRngLT, TComMv* pcMvSrchRngRB, const Int iStartX, const Int iStartY, const Int iDist )
575 {
576  Int iSrchRngHorLeft = pcMvSrchRngLT->getHor();
577  Int iSrchRngHorRight = pcMvSrchRngRB->getHor();
578  Int iSrchRngVerTop = pcMvSrchRngLT->getVer();
579  Int iSrchRngVerBottom = pcMvSrchRngRB->getVer();
580 
581  // 8 point search, // 1 2 3
582  // search around the start point // 4 0 5
583  // with the required distance // 6 7 8
584  assert( iDist != 0 );
585  const Int iTop = iStartY - iDist;
586  const Int iBottom = iStartY + iDist;
587  const Int iLeft = iStartX - iDist;
588  const Int iRight = iStartX + iDist;
589  rcStruct.uiBestRound += 1;
590 
591  if ( iTop >= iSrchRngVerTop ) // check top
592  {
593  if ( iLeft >= iSrchRngHorLeft ) // check top left
594  {
595  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iTop, 1, iDist );
596  }
597  // top middle
598  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist );
599 
600  if ( iRight <= iSrchRngHorRight ) // check top right
601  {
602  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iTop, 3, iDist );
603  }
604  } // check top
605  if ( iLeft >= iSrchRngHorLeft ) // check middle left
606  {
607  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 4, iDist );
608  }
609  if ( iRight <= iSrchRngHorRight ) // check middle right
610  {
611  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 5, iDist );
612  }
613  if ( iBottom <= iSrchRngVerBottom ) // check bottom
614  {
615  if ( iLeft >= iSrchRngHorLeft ) // check bottom left
616  {
617  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iBottom, 6, iDist );
618  }
619  // check bottom middle
620  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist );
621 
622  if ( iRight <= iSrchRngHorRight ) // check bottom right
623  {
624  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iBottom, 8, iDist );
625  }
626  } // check bottom
627 }
628 
629 
630 
631 
632 __inline Void TEncSearch::xTZ8PointDiamondSearch( TComPattern* pcPatternKey, IntTZSearchStruct& rcStruct, TComMv* pcMvSrchRngLT, TComMv* pcMvSrchRngRB, const Int iStartX, const Int iStartY, const Int iDist )
633 {
634  Int iSrchRngHorLeft = pcMvSrchRngLT->getHor();
635  Int iSrchRngHorRight = pcMvSrchRngRB->getHor();
636  Int iSrchRngVerTop = pcMvSrchRngLT->getVer();
637  Int iSrchRngVerBottom = pcMvSrchRngRB->getVer();
638 
639  // 8 point search, // 1 2 3
640  // search around the start point // 4 0 5
641  // with the required distance // 6 7 8
642  assert ( iDist != 0 );
643  const Int iTop = iStartY - iDist;
644  const Int iBottom = iStartY + iDist;
645  const Int iLeft = iStartX - iDist;
646  const Int iRight = iStartX + iDist;
647  rcStruct.uiBestRound += 1;
648 
649  if ( iDist == 1 ) // iDist == 1
650  {
651  if ( iTop >= iSrchRngVerTop ) // check top
652  {
653  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist );
654  }
655  if ( iLeft >= iSrchRngHorLeft ) // check middle left
656  {
657  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 4, iDist );
658  }
659  if ( iRight <= iSrchRngHorRight ) // check middle right
660  {
661  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 5, iDist );
662  }
663  if ( iBottom <= iSrchRngVerBottom ) // check bottom
664  {
665  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist );
666  }
667  }
668  else // if (iDist != 1)
669  {
670  if ( iDist <= 8 )
671  {
672  const Int iTop_2 = iStartY - (iDist>>1);
673  const Int iBottom_2 = iStartY + (iDist>>1);
674  const Int iLeft_2 = iStartX - (iDist>>1);
675  const Int iRight_2 = iStartX + (iDist>>1);
676 
677  if ( iTop >= iSrchRngVerTop && iLeft >= iSrchRngHorLeft &&
678  iRight <= iSrchRngHorRight && iBottom <= iSrchRngVerBottom ) // check border
679  {
680  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist );
681  xTZSearchHelp( pcPatternKey, rcStruct, iLeft_2, iTop_2, 1, iDist>>1 );
682  xTZSearchHelp( pcPatternKey, rcStruct, iRight_2, iTop_2, 3, iDist>>1 );
683  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 4, iDist );
684  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 5, iDist );
685  xTZSearchHelp( pcPatternKey, rcStruct, iLeft_2, iBottom_2, 6, iDist>>1 );
686  xTZSearchHelp( pcPatternKey, rcStruct, iRight_2, iBottom_2, 8, iDist>>1 );
687  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist );
688  }
689  else // check border
690  {
691  if ( iTop >= iSrchRngVerTop ) // check top
692  {
693  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist );
694  }
695  if ( iTop_2 >= iSrchRngVerTop ) // check half top
696  {
697  if ( iLeft_2 >= iSrchRngHorLeft ) // check half left
698  {
699  xTZSearchHelp( pcPatternKey, rcStruct, iLeft_2, iTop_2, 1, (iDist>>1) );
700  }
701  if ( iRight_2 <= iSrchRngHorRight ) // check half right
702  {
703  xTZSearchHelp( pcPatternKey, rcStruct, iRight_2, iTop_2, 3, (iDist>>1) );
704  }
705  } // check half top
706  if ( iLeft >= iSrchRngHorLeft ) // check left
707  {
708  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 4, iDist );
709  }
710  if ( iRight <= iSrchRngHorRight ) // check right
711  {
712  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 5, iDist );
713  }
714  if ( iBottom_2 <= iSrchRngVerBottom ) // check half bottom
715  {
716  if ( iLeft_2 >= iSrchRngHorLeft ) // check half left
717  {
718  xTZSearchHelp( pcPatternKey, rcStruct, iLeft_2, iBottom_2, 6, (iDist>>1) );
719  }
720  if ( iRight_2 <= iSrchRngHorRight ) // check half right
721  {
722  xTZSearchHelp( pcPatternKey, rcStruct, iRight_2, iBottom_2, 8, (iDist>>1) );
723  }
724  } // check half bottom
725  if ( iBottom <= iSrchRngVerBottom ) // check bottom
726  {
727  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist );
728  }
729  } // check border
730  }
731  else // iDist > 8
732  {
733  if ( iTop >= iSrchRngVerTop && iLeft >= iSrchRngHorLeft &&
734  iRight <= iSrchRngHorRight && iBottom <= iSrchRngVerBottom ) // check border
735  {
736  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 0, iDist );
737  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 0, iDist );
738  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 0, iDist );
739  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 0, iDist );
740  for ( Int index = 1; index < 4; index++ )
741  {
742  Int iPosYT = iTop + ((iDist>>2) * index);
743  Int iPosYB = iBottom - ((iDist>>2) * index);
744  Int iPosXL = iStartX - ((iDist>>2) * index);
745  Int iPosXR = iStartX + ((iDist>>2) * index);
746  xTZSearchHelp( pcPatternKey, rcStruct, iPosXL, iPosYT, 0, iDist );
747  xTZSearchHelp( pcPatternKey, rcStruct, iPosXR, iPosYT, 0, iDist );
748  xTZSearchHelp( pcPatternKey, rcStruct, iPosXL, iPosYB, 0, iDist );
749  xTZSearchHelp( pcPatternKey, rcStruct, iPosXR, iPosYB, 0, iDist );
750  }
751  }
752  else // check border
753  {
754  if ( iTop >= iSrchRngVerTop ) // check top
755  {
756  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 0, iDist );
757  }
758  if ( iLeft >= iSrchRngHorLeft ) // check left
759  {
760  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 0, iDist );
761  }
762  if ( iRight <= iSrchRngHorRight ) // check right
763  {
764  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 0, iDist );
765  }
766  if ( iBottom <= iSrchRngVerBottom ) // check bottom
767  {
768  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 0, iDist );
769  }
770  for ( Int index = 1; index < 4; index++ )
771  {
772  Int iPosYT = iTop + ((iDist>>2) * index);
773  Int iPosYB = iBottom - ((iDist>>2) * index);
774  Int iPosXL = iStartX - ((iDist>>2) * index);
775  Int iPosXR = iStartX + ((iDist>>2) * index);
776 
777  if ( iPosYT >= iSrchRngVerTop ) // check top
778  {
779  if ( iPosXL >= iSrchRngHorLeft ) // check left
780  {
781  xTZSearchHelp( pcPatternKey, rcStruct, iPosXL, iPosYT, 0, iDist );
782  }
783  if ( iPosXR <= iSrchRngHorRight ) // check right
784  {
785  xTZSearchHelp( pcPatternKey, rcStruct, iPosXR, iPosYT, 0, iDist );
786  }
787  } // check top
788  if ( iPosYB <= iSrchRngVerBottom ) // check bottom
789  {
790  if ( iPosXL >= iSrchRngHorLeft ) // check left
791  {
792  xTZSearchHelp( pcPatternKey, rcStruct, iPosXL, iPosYB, 0, iDist );
793  }
794  if ( iPosXR <= iSrchRngHorRight ) // check right
795  {
796  xTZSearchHelp( pcPatternKey, rcStruct, iPosXR, iPosYB, 0, iDist );
797  }
798  } // check bottom
799  } // for ...
800  } // check border
801  } // iDist <= 8
802  } // iDist == 1
803 }
804 
805 
806 
807 
808 
809 //<--
810 
812  TComMv baseRefMv,
813  Int iFrac, TComMv& rcMvFrac,
814  Bool bAllowUseOfHadamard
815  )
816 {
817  Distortion uiDist;
818  Distortion uiDistBest = std::numeric_limits<Distortion>::max();
819  UInt uiDirecBest = 0;
820 
821  Pel* piRefPos;
822  Int iRefStride = m_filteredBlock[0][0].getStride(COMPONENT_Y);
823 
824  m_pcRdCost->setDistParam( pcPatternKey, m_filteredBlock[0][0].getAddr(COMPONENT_Y), iRefStride, 1, m_cDistParam, m_pcEncCfg->getUseHADME() && bAllowUseOfHadamard );
825 
826  const TComMv* pcMvRefine = (iFrac == 2 ? s_acMvRefineH : s_acMvRefineQ);
827 
828  for (UInt i = 0; i < 9; i++)
829  {
830  TComMv cMvTest = pcMvRefine[i];
831  cMvTest += baseRefMv;
832 
833  Int horVal = cMvTest.getHor() * iFrac;
834  Int verVal = cMvTest.getVer() * iFrac;
835  piRefPos = m_filteredBlock[ verVal & 3 ][ horVal & 3 ].getAddr(COMPONENT_Y);
836  if ( horVal == 2 && ( verVal & 1 ) == 0 )
837  {
838  piRefPos += 1;
839  }
840  if ( ( horVal & 1 ) == 0 && verVal == 2 )
841  {
842  piRefPos += iRefStride;
843  }
844  cMvTest = pcMvRefine[i];
845  cMvTest += rcMvFrac;
846 
848 
849  m_cDistParam.pCur = piRefPos;
851  uiDist = m_cDistParam.DistFunc( &m_cDistParam );
852  uiDist += m_pcRdCost->getCost( cMvTest.getHor(), cMvTest.getVer() );
853 
854  if ( uiDist < uiDistBest )
855  {
856  uiDistBest = uiDist;
857  uiDirecBest = i;
858  }
859  }
860 
861  rcMvFrac = pcMvRefine[uiDirecBest];
862 
863  return uiDistBest;
864 }
865 
866 
867 
868 Void
870  Bool bLuma,
871  Bool bChroma )
872 {
873  TComDataCU* pcCU=rTu.getCU();
874  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
875  const UInt uiTrDepth = rTu.GetTransformDepthRel();
876  const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
877  const UInt uiSubdiv = ( uiTrMode > uiTrDepth ? 1 : 0 );
878  const UInt uiLog2LumaTrafoSize = rTu.GetLog2LumaTrSize();
879 
880  if( pcCU->isIntra(0) && pcCU->getPartitionSize(0) == SIZE_NxN && uiTrDepth == 0 )
881  {
882  assert( uiSubdiv );
883  }
884  else if( uiLog2LumaTrafoSize > pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() )
885  {
886  assert( uiSubdiv );
887  }
888  else if( uiLog2LumaTrafoSize == pcCU->getSlice()->getSPS()->getQuadtreeTULog2MinSize() )
889  {
890  assert( !uiSubdiv );
891  }
892  else if( uiLog2LumaTrafoSize == pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) )
893  {
894  assert( !uiSubdiv );
895  }
896  else
897  {
898  assert( uiLog2LumaTrafoSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) );
899  if( bLuma )
900  {
901  m_pcEntropyCoder->encodeTransformSubdivFlag( uiSubdiv, 5 - uiLog2LumaTrafoSize );
902  }
903  }
904 
905  if ( bChroma )
906  {
907  const UInt numberValidComponents = getNumberValidComponents(rTu.GetChromaFormat());
908  for (UInt ch=COMPONENT_Cb; ch<numberValidComponents; ch++)
909  {
910  const ComponentID compID=ComponentID(ch);
911  if( rTu.ProcessingAllQuadrants(compID) && (uiTrDepth==0 || pcCU->getCbf( uiAbsPartIdx, compID, uiTrDepth-1 ) ))
912  {
913  m_pcEntropyCoder->encodeQtCbf(rTu, compID, (uiSubdiv == 0));
914  }
915  }
916  }
917 
918  if( uiSubdiv )
919  {
920  TComTURecurse tuRecurse(rTu, false);
921  do
922  {
923  xEncSubdivCbfQT( tuRecurse, bLuma, bChroma );
924  } while (tuRecurse.nextSection(rTu));
925  }
926  else
927  {
928  //===== Cbfs =====
929  if( bLuma )
930  {
932  }
933  }
934 }
935 
936 
937 
938 
939 Void
941  const ComponentID component,
942  Bool bRealCoeff )
943 {
944  TComDataCU* pcCU=rTu.getCU();
945  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
946  const UInt uiTrDepth=rTu.GetTransformDepthRel();
947 
948  const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
949  const UInt uiSubdiv = ( uiTrMode > uiTrDepth ? 1 : 0 );
950 
951  if( uiSubdiv )
952  {
953  TComTURecurse tuRecurseChild(rTu, false);
954  do
955  {
956  xEncCoeffQT( tuRecurseChild, component, bRealCoeff );
957  } while (tuRecurseChild.nextSection(rTu) );
958  }
959  else if (rTu.ProcessComponentSection(component))
960  {
961  //===== coefficients =====
962  const UInt uiLog2TrafoSize = rTu.GetLog2LumaTrSize();
963  UInt uiCoeffOffset = rTu.getCoefficientOffset(component);
964  UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrafoSize;
965  TCoeff* pcCoeff = bRealCoeff ? pcCU->getCoeff(component) : m_ppcQTTempCoeff[component][uiQTLayer];
966 
967  if (isChroma(component) && (pcCU->getCbf( rTu.GetAbsPartIdxTU(), COMPONENT_Y, uiTrMode ) != 0) && pcCU->getSlice()->getPPS()->getUseCrossComponentPrediction() )
968  {
970  }
971 
972  m_pcEntropyCoder->encodeCoeffNxN( rTu, pcCoeff+uiCoeffOffset, component );
973  }
974 }
975 
976 
977 
978 
979 Void
981  UInt uiTrDepth,
982  UInt uiAbsPartIdx,
983  Bool bLuma,
984  Bool bChroma )
985 {
986  if( bLuma )
987  {
988  // CU header
989  if( uiAbsPartIdx == 0 )
990  {
991  if( !pcCU->getSlice()->isIntra() )
992  {
994  {
996  }
997  m_pcEntropyCoder->encodeSkipFlag( pcCU, 0, true );
998  m_pcEntropyCoder->encodePredMode( pcCU, 0, true );
999  }
1000  m_pcEntropyCoder ->encodePartSize( pcCU, 0, pcCU->getDepth(0), true );
1001 
1002  if (pcCU->isIntra(0) && pcCU->getPartitionSize(0) == SIZE_2Nx2N )
1003  {
1004  m_pcEntropyCoder->encodeIPCMInfo( pcCU, 0, true );
1005 
1006  if ( pcCU->getIPCMFlag (0))
1007  {
1008  return;
1009  }
1010  }
1011  }
1012  // luma prediction mode
1013  if( pcCU->getPartitionSize(0) == SIZE_2Nx2N )
1014  {
1015  if (uiAbsPartIdx==0)
1016  {
1018  }
1019  }
1020  else
1021  {
1022  UInt uiQNumParts = pcCU->getTotalNumPart() >> 2;
1023  if (uiTrDepth>0 && (uiAbsPartIdx%uiQNumParts)==0)
1024  {
1025  m_pcEntropyCoder->encodeIntraDirModeLuma ( pcCU, uiAbsPartIdx );
1026  }
1027  }
1028  }
1029 
1030  if( bChroma )
1031  {
1033  {
1034  if(uiAbsPartIdx==0)
1035  {
1036  m_pcEntropyCoder->encodeIntraDirModeChroma ( pcCU, uiAbsPartIdx );
1037  }
1038  }
1039  else
1040  {
1041  UInt uiQNumParts = pcCU->getTotalNumPart() >> 2;
1042  assert(uiTrDepth>0);
1043  if ((uiAbsPartIdx%uiQNumParts)==0)
1044  {
1045  m_pcEntropyCoder->encodeIntraDirModeChroma ( pcCU, uiAbsPartIdx );
1046  }
1047  }
1048  }
1049 }
1050 
1051 
1052 
1053 
1054 UInt
1056  Bool bLuma,
1057  Bool bChroma,
1058  Bool bRealCoeff /* just for test */ )
1059 {
1060  TComDataCU* pcCU=rTu.getCU();
1061  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1062  const UInt uiTrDepth=rTu.GetTransformDepthRel();
1064  xEncIntraHeader ( pcCU, uiTrDepth, uiAbsPartIdx, bLuma, bChroma );
1065  xEncSubdivCbfQT ( rTu, bLuma, bChroma );
1066 
1067  if( bLuma )
1068  {
1069  xEncCoeffQT ( rTu, COMPONENT_Y, bRealCoeff );
1070  }
1071  if( bChroma )
1072  {
1073  xEncCoeffQT ( rTu, COMPONENT_Cb, bRealCoeff );
1074  xEncCoeffQT ( rTu, COMPONENT_Cr, bRealCoeff );
1075  }
1077 
1078  return uiBits;
1079 }
1080 
1082  ComponentID compID,
1083  Bool bRealCoeff /* just for test */ )
1084 {
1086  xEncCoeffQT ( rTu, compID, bRealCoeff );
1088  return uiBits;
1089 }
1090 
1092  TComYuv* pcPredYuv,
1093  TComYuv* pcResiYuv,
1095  const Bool checkCrossCPrediction,
1096  Distortion& ruiDist,
1097  const ComponentID compID,
1098  TComTU& rTu
1099  DEBUG_STRING_FN_DECLARE(sDebug)
1100  ,Int default0Save1Load2
1101  )
1102 {
1103  if (!rTu.ProcessComponentSection(compID))
1104  {
1105  return;
1106  }
1107  const Bool bIsLuma = isLuma(compID);
1108  const TComRectangle &rect= rTu.getRect(compID);
1109  TComDataCU *pcCU=rTu.getCU();
1110  const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1111 
1112  const UInt uiTrDepth=rTu.GetTransformDepthRelAdj(compID);
1113  const UInt uiFullDepth = rTu.GetTransformDepthTotal();
1114  const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
1115  const ChromaFormat chFmt = pcOrgYuv->getChromaFormat();
1116  const ChannelType chType = toChannelType(compID);
1117 
1118  const UInt uiWidth = rect.width;
1119  const UInt uiHeight = rect.height;
1120  const UInt uiStride = pcOrgYuv ->getStride (compID);
1121  Pel* piOrg = pcOrgYuv ->getAddr( compID, uiAbsPartIdx );
1122  Pel* piPred = pcPredYuv->getAddr( compID, uiAbsPartIdx );
1123  Pel* piResi = pcResiYuv->getAddr( compID, uiAbsPartIdx );
1124  Pel* piReco = pcPredYuv->getAddr( compID, uiAbsPartIdx );
1125  const UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
1126  Pel* piRecQt = m_pcQTTempTComYuv[ uiQTLayer ].getAddr( compID, uiAbsPartIdx );
1127  const UInt uiRecQtStride = m_pcQTTempTComYuv[ uiQTLayer ].getStride(compID);
1128  const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiAbsPartIdx;
1129  Pel* piRecIPred = pcCU->getPic()->getPicYuvRec()->getAddr( compID, pcCU->getCtuRsAddr(), uiZOrder );
1130  UInt uiRecIPredStride = pcCU->getPic()->getPicYuvRec()->getStride ( compID );
1131  TCoeff* pcCoeff = m_ppcQTTempCoeff[compID][uiQTLayer] + rTu.getCoefficientOffset(compID);
1132  Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID);
1133 
1134 #if ADAPTIVE_QP_SELECTION
1135  TCoeff* pcArlCoeff = m_ppcQTTempArlCoeff[compID][ uiQTLayer ] + rTu.getCoefficientOffset(compID);
1136 #endif
1137 
1138  const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
1139  const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && !bIsLuma) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
1140  const UInt uiChFinalMode = ((chFmt == CHROMA_422) && !bIsLuma) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1141 
1142  const Int blkX = g_auiRasterToPelX[ g_auiZscanToRaster[ uiAbsPartIdx ] ];
1143  const Int blkY = g_auiRasterToPelY[ g_auiZscanToRaster[ uiAbsPartIdx ] ];
1144  const Int bufferOffset = blkX + (blkY * MAX_CU_SIZE);
1145  Pel *const encoderLumaResidual = resiLuma[RESIDUAL_ENCODER_SIDE ] + bufferOffset;
1146  Pel *const reconstructedLumaResidual = resiLuma[RESIDUAL_RECONSTRUCTED] + bufferOffset;
1147  const Bool bUseCrossCPrediction = isChroma(compID) && (uiChPredMode == DM_CHROMA_IDX) && checkCrossCPrediction;
1148  const Bool bUseReconstructedResidualForEstimate = m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate();
1149  Pel *const lumaResidualForEstimate = bUseReconstructedResidualForEstimate ? reconstructedLumaResidual : encoderLumaResidual;
1150 
1151 #ifdef DEBUG_STRING
1152  const Int debugPredModeMask=DebugStringGetPredModeMask(MODE_INTRA);
1153 #endif
1154 
1155  //===== init availability pattern =====
1156  Bool bAboveAvail = false;
1157  Bool bLeftAvail = false;
1158 
1159  DEBUG_STRING_NEW(sTemp)
1160 
1161 #ifndef DEBUG_STRING
1162  if( default0Save1Load2 != 2 )
1163 #endif
1164  {
1165  const Bool bUseFilteredPredictions=TComPrediction::filteringIntraReferenceSamples(compID, uiChFinalMode, uiWidth, uiHeight, chFmt, pcCU->getSlice()->getSPS()->getDisableIntraReferenceSmoothing());
1166 
1167  initAdiPatternChType( rTu, bAboveAvail, bLeftAvail, compID, bUseFilteredPredictions DEBUG_STRING_PASS_INTO(sDebug) );
1168 
1169  //===== get prediction signal =====
1170  predIntraAng( compID, uiChFinalMode, piOrg, uiStride, piPred, uiStride, rTu, bAboveAvail, bLeftAvail, bUseFilteredPredictions );
1171 
1172  // save prediction
1173  if( default0Save1Load2 == 1 )
1174  {
1175  Pel* pPred = piPred;
1176  Pel* pPredBuf = m_pSharedPredTransformSkip[compID];
1177  Int k = 0;
1178  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1179  {
1180  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1181  {
1182  pPredBuf[ k ++ ] = pPred[ uiX ];
1183  }
1184  pPred += uiStride;
1185  }
1186  }
1187  }
1188 #ifndef DEBUG_STRING
1189  else
1190  {
1191  // load prediction
1192  Pel* pPred = piPred;
1193  Pel* pPredBuf = m_pSharedPredTransformSkip[compID];
1194  Int k = 0;
1195  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1196  {
1197  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1198  {
1199  pPred[ uiX ] = pPredBuf[ k ++ ];
1200  }
1201  pPred += uiStride;
1202  }
1203  }
1204 #endif
1205 
1206  //===== get residual signal =====
1207  {
1208  // get residual
1209  Pel* pOrg = piOrg;
1210  Pel* pPred = piPred;
1211  Pel* pResi = piResi;
1212 
1213  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1214  {
1215  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1216  {
1217  pResi[ uiX ] = pOrg[ uiX ] - pPred[ uiX ];
1218  }
1219 
1220  pOrg += uiStride;
1221  pResi += uiStride;
1222  pPred += uiStride;
1223  }
1224  }
1225 
1227  {
1228  if (bUseCrossCPrediction)
1229  {
1230  if (xCalcCrossComponentPredictionAlpha( rTu, compID, lumaResidualForEstimate, piResi, uiWidth, uiHeight, MAX_CU_SIZE, uiStride ) == 0)
1231  {
1232  return;
1233  }
1234  TComTrQuant::crossComponentPrediction ( rTu, compID, reconstructedLumaResidual, piResi, piResi, uiWidth, uiHeight, MAX_CU_SIZE, uiStride, uiStride, false );
1235  }
1236  else if (isLuma(compID) && !bUseReconstructedResidualForEstimate)
1237  {
1238  xStoreCrossComponentPredictionResult( encoderLumaResidual, piResi, rTu, 0, 0, MAX_CU_SIZE, uiStride );
1239  }
1240  }
1241 
1242  //===== transform and quantization =====
1243  //--- init rate estimation arrays for RDOQ ---
1244  if( useTransformSkip ? m_pcEncCfg->getUseRDOQTS() : m_pcEncCfg->getUseRDOQ() )
1245  {
1246  m_pcEntropyCoder->estimateBit( m_pcTrQuant->m_pcEstBitsSbac, uiWidth, uiHeight, chType );
1247  }
1248 
1249  //--- transform and quantization ---
1250  TCoeff uiAbsSum = 0;
1251  if (bIsLuma)
1252  {
1253  pcCU ->setTrIdxSubParts ( uiTrDepth, uiAbsPartIdx, uiFullDepth );
1254  }
1255 
1256  const QpParam cQP(*pcCU, compID);
1257 
1258 #if RDOQ_CHROMA_LAMBDA
1259  m_pcTrQuant->selectLambda (compID);
1260 #endif
1261 
1262  m_pcTrQuant->transformNxN ( rTu, compID, piResi, uiStride, pcCoeff,
1264  pcArlCoeff,
1265 #endif
1266  uiAbsSum, cQP
1267  );
1268 
1269  //--- inverse transform ---
1270 
1271 #ifdef DEBUG_STRING
1272  if ( (uiAbsSum > 0) || (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask) )
1273 #else
1274  if ( uiAbsSum > 0 )
1275 #endif
1276  {
1277  m_pcTrQuant->invTransformNxN ( rTu, compID, piResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO_OPTIONAL(&sDebug, (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask)) );
1278  }
1279  else
1280  {
1281  Pel* pResi = piResi;
1282  memset( pcCoeff, 0, sizeof( TCoeff ) * uiWidth * uiHeight );
1283  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1284  {
1285  memset( pResi, 0, sizeof( Pel ) * uiWidth );
1286  pResi += uiStride;
1287  }
1288  }
1289 
1290 
1291  //===== reconstruction =====
1292  {
1293  Pel* pPred = piPred;
1294  Pel* pResi = piResi;
1295  Pel* pReco = piReco;
1296  Pel* pRecQt = piRecQt;
1297  Pel* pRecIPred = piRecIPred;
1298  const UInt clipbd=g_bitDepth[chType];
1299 
1301  {
1302  if (bUseCrossCPrediction)
1303  {
1304  TComTrQuant::crossComponentPrediction( rTu, compID, reconstructedLumaResidual, piResi, piResi, uiWidth, uiHeight, MAX_CU_SIZE, uiStride, uiStride, true );
1305  }
1306  else if (isLuma(compID))
1307  {
1308  xStoreCrossComponentPredictionResult( reconstructedLumaResidual, piResi, rTu, 0, 0, MAX_CU_SIZE, uiStride );
1309  }
1310  }
1311 
1312  #ifdef DEBUG_STRING
1313  std::stringstream ss(stringstream::out);
1314  const Bool bDebugPred=((DebugOptionList::DebugString_Pred.getInt()&debugPredModeMask) && DEBUG_STRING_CHANNEL_CONDITION(compID));
1315  const Bool bDebugResi=((DebugOptionList::DebugString_Resi.getInt()&debugPredModeMask) && DEBUG_STRING_CHANNEL_CONDITION(compID));
1316  const Bool bDebugReco=((DebugOptionList::DebugString_Reco.getInt()&debugPredModeMask) && DEBUG_STRING_CHANNEL_CONDITION(compID));
1317 
1318  if (bDebugPred || bDebugResi || bDebugReco)
1319  {
1320  ss << "###: " << "CompID: " << compID << " pred mode (ch/fin): " << uiChPredMode << "/" << uiChFinalMode << " absPartIdx: " << rTu.GetAbsPartIdxTU() << "\n";
1321  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1322  {
1323  ss << "###: ";
1324  if (bDebugPred)
1325  {
1326  ss << " - pred: ";
1327  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1328  {
1329  ss << pPred[ uiX ] << ", ";
1330  }
1331  }
1332  if (bDebugResi)
1333  {
1334  ss << " - resi: ";
1335  }
1336  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1337  {
1338  if (bDebugResi)
1339  {
1340  ss << pResi[ uiX ] << ", ";
1341  }
1342  pReco [ uiX ] = Pel(ClipBD<Int>( Int(pPred[uiX]) + Int(pResi[uiX]), clipbd ));
1343  pRecQt [ uiX ] = pReco[ uiX ];
1344  pRecIPred[ uiX ] = pReco[ uiX ];
1345  }
1346  if (bDebugReco)
1347  {
1348  ss << " - reco: ";
1349  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1350  {
1351  ss << pReco[ uiX ] << ", ";
1352  }
1353  }
1354  pPred += uiStride;
1355  pResi += uiStride;
1356  pReco += uiStride;
1357  pRecQt += uiRecQtStride;
1358  pRecIPred += uiRecIPredStride;
1359  ss << "\n";
1360  }
1361  DEBUG_STRING_APPEND(sDebug, ss.str())
1362  }
1363  else
1364 #endif
1365  {
1366 
1367  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1368  {
1369  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1370  {
1371  pReco [ uiX ] = Pel(ClipBD<Int>( Int(pPred[uiX]) + Int(pResi[uiX]), clipbd ));
1372  pRecQt [ uiX ] = pReco[ uiX ];
1373  pRecIPred[ uiX ] = pReco[ uiX ];
1374  }
1375  pPred += uiStride;
1376  pResi += uiStride;
1377  pReco += uiStride;
1378  pRecQt += uiRecQtStride;
1379  pRecIPred += uiRecIPredStride;
1380  }
1381  }
1382  }
1383 
1384  //===== update distortion =====
1385  ruiDist += m_pcRdCost->getDistPart( g_bitDepth[chType], piReco, uiStride, piOrg, uiStride, uiWidth, uiHeight, compID );
1386 }
1387 
1388 
1389 
1390 
1391 Void
1393  TComYuv* pcPredYuv,
1394  TComYuv* pcResiYuv,
1395  Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE],
1396  Distortion& ruiDistY,
1398  Bool bCheckFirst,
1399 #endif
1400  Double& dRDCost,
1401  TComTU& rTu
1402  DEBUG_STRING_FN_DECLARE(sDebug))
1403 {
1404  TComDataCU *pcCU = rTu.getCU();
1405  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1406  const UInt uiFullDepth = rTu.GetTransformDepthTotal();
1407  const UInt uiTrDepth = rTu.GetTransformDepthRel();
1408  const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
1409  Bool bCheckFull = ( uiLog2TrSize <= pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() );
1410  Bool bCheckSplit = ( uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) );
1411 
1412  Pel resiLumaSplit [NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];
1413  Pel resiLumaSingle[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];
1414 
1415  Bool bMaintainResidual[NUMBER_OF_STORED_RESIDUAL_TYPES];
1416  for (UInt residualTypeIndex = 0; residualTypeIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; residualTypeIndex++)
1417  {
1418  bMaintainResidual[residualTypeIndex] = true; //assume true unless specified otherwise
1419  }
1420 
1422 
1423 #if HHI_RQT_INTRA_SPEEDUP
1424  Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();
1425  Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE);
1426  // don't check split if TU size is less or equal to max TU size
1427  Bool noSplitIntraMaxTuSize = bCheckFull;
1428  if(m_pcEncCfg->getRDpenalty() && ! isIntraSlice)
1429  {
1430  // in addition don't check split if TU size is less or equal to 16x16 TU size for non-intra slice
1431  noSplitIntraMaxTuSize = ( uiLog2TrSize <= min(maxTuSize,4) );
1432 
1433  // if maximum RD-penalty don't check TU size 32x32
1434  if(m_pcEncCfg->getRDpenalty()==2)
1435  {
1436  bCheckFull = ( uiLog2TrSize <= min(maxTuSize,4));
1437  }
1438  }
1439  if( bCheckFirst && noSplitIntraMaxTuSize )
1440 
1441  {
1442  bCheckSplit = false;
1443  }
1444 #else
1445  Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();
1446  Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE);
1447  // if maximum RD-penalty don't check TU size 32x32
1448  if((m_pcEncCfg->getRDpenalty()==2) && !isIntraSlice)
1449  {
1450  bCheckFull = ( uiLog2TrSize <= min(maxTuSize,4));
1451  }
1452 #endif
1453  Double dSingleCost = MAX_DOUBLE;
1454  Distortion uiSingleDistLuma = 0;
1455  UInt uiSingleCbfLuma = 0;
1456  Bool checkTransformSkip = pcCU->getSlice()->getPPS()->getUseTransformSkip();
1457  Int bestModeId[MAX_NUM_COMPONENT] = { 0, 0, 0};
1458  checkTransformSkip &= TUCompRectHasAssociatedTransformSkipFlag(rTu.getRect(COMPONENT_Y), pcCU->getSlice()->getPPS()->getTransformSkipLog2MaxSize());
1459  checkTransformSkip &= (!pcCU->getCUTransquantBypass(0));
1460 
1462  {
1463  checkTransformSkip &= (pcCU->getPartitionSize(uiAbsPartIdx)==SIZE_NxN);
1464  }
1465 
1466  if( bCheckFull )
1467  {
1468  if(checkTransformSkip == true)
1469  {
1470  //----- store original entropy coding status -----
1472 
1473  Distortion singleDistTmpLuma = 0;
1474  UInt singleCbfTmpLuma = 0;
1475  Double singleCostTmp = 0;
1476  Int firstCheckId = 0;
1477 
1478  for(Int modeId = firstCheckId; modeId < 2; modeId ++)
1479  {
1480  DEBUG_STRING_NEW(sModeString)
1481  Int default0Save1Load2 = 0;
1482  singleDistTmpLuma=0;
1483  if(modeId == firstCheckId)
1484  {
1485  default0Save1Load2 = 1;
1486  }
1487  else
1488  {
1489  default0Save1Load2 = 2;
1490  }
1491 
1492  if (rTu.ProcessComponentSection(COMPONENT_Y))
1493  {
1494  const UInt totalAdjustedDepthChan = rTu.GetTransformDepthTotalAdj(COMPONENT_Y);
1495  pcCU->setTransformSkipSubParts ( modeId, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
1496 
1497  xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, singleDistTmpLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sModeString), default0Save1Load2 );
1498  }
1499  singleCbfTmpLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth );
1500 
1501  //----- determine rate and r-d cost -----
1502  if(modeId == 1 && singleCbfTmpLuma == 0)
1503  {
1504  //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
1505  singleCostTmp = MAX_DOUBLE;
1506  }
1507  else
1508  {
1509  UInt uiSingleBits = xGetIntraBitsQT( rTu, true, false, false );
1510  singleCostTmp = m_pcRdCost->calcRdCost( uiSingleBits, singleDistTmpLuma );
1511  }
1512  if(singleCostTmp < dSingleCost)
1513  {
1514  DEBUG_STRING_SWAP(sDebug, sModeString)
1515  dSingleCost = singleCostTmp;
1516  uiSingleDistLuma = singleDistTmpLuma;
1517  uiSingleCbfLuma = singleCbfTmpLuma;
1518 
1519  bestModeId[COMPONENT_Y] = modeId;
1520  if(bestModeId[COMPONENT_Y] == firstCheckId)
1521  {
1524  }
1525 
1527  {
1528  const Int xOffset = rTu.getRect( COMPONENT_Y ).x0;
1529  const Int yOffset = rTu.getRect( COMPONENT_Y ).y0;
1530  for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
1531  {
1532  if (bMaintainResidual[storedResidualIndex])
1533  {
1534  xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSingle[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE);
1535  }
1536  }
1537  }
1538  }
1539  if (modeId == firstCheckId)
1540  {
1541  m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );
1542  }
1543  }
1544 
1545  if (rTu.ProcessComponentSection(COMPONENT_Y))
1546  {
1547  const UInt totalAdjustedDepthChan = rTu.GetTransformDepthTotalAdj(COMPONENT_Y);
1548  pcCU ->setTransformSkipSubParts ( bestModeId[COMPONENT_Y], COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
1549  }
1550 
1551  if(bestModeId[COMPONENT_Y] == firstCheckId)
1552  {
1554  if (rTu.ProcessComponentSection(COMPONENT_Y))
1555  {
1556  pcCU->setCbfSubParts ( uiSingleCbfLuma << uiTrDepth, COMPONENT_Y, uiAbsPartIdx, rTu.GetTransformDepthTotalAdj(COMPONENT_Y) );
1557  }
1558 
1560  }
1561  }
1562  else
1563  {
1564  //----- store original entropy coding status -----
1565  if( bCheckSplit )
1566  {
1568  }
1569  //----- code luma/chroma block with given intra prediction mode and store Cbf-----
1570  dSingleCost = 0.0;
1571  if (rTu.ProcessComponentSection(COMPONENT_Y))
1572  {
1573  const UInt totalAdjustedDepthChan = rTu.GetTransformDepthTotalAdj(COMPONENT_Y);
1574  pcCU ->setTransformSkipSubParts ( 0, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
1575  }
1576 
1577  xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, uiSingleDistLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sDebug));
1578 
1579  if( bCheckSplit )
1580  {
1581  uiSingleCbfLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth );
1582  }
1583  //----- determine rate and r-d cost -----
1584  UInt uiSingleBits = xGetIntraBitsQT( rTu, true, false, false );
1585 
1586  if(m_pcEncCfg->getRDpenalty() && (uiLog2TrSize==5) && !isIntraSlice)
1587  {
1588  uiSingleBits=uiSingleBits*4;
1589  }
1590 
1591  dSingleCost = m_pcRdCost->calcRdCost( uiSingleBits, uiSingleDistLuma );
1592 
1594  {
1595  const Int xOffset = rTu.getRect( COMPONENT_Y ).x0;
1596  const Int yOffset = rTu.getRect( COMPONENT_Y ).y0;
1597  for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
1598  {
1599  if (bMaintainResidual[storedResidualIndex])
1600  {
1601  xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSingle[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE);
1602  }
1603  }
1604  }
1605  }
1606  }
1607 
1608  if( bCheckSplit )
1609  {
1610  //----- store full entropy coding status, load original entropy coding status -----
1611  if( bCheckFull )
1612  {
1615  }
1616  else
1617  {
1619  }
1620  //----- code splitted block -----
1621  Double dSplitCost = 0.0;
1622  Distortion uiSplitDistLuma = 0;
1623  UInt uiSplitCbfLuma = 0;
1624 
1625  TComTURecurse tuRecurseChild(rTu, false);
1626  DEBUG_STRING_NEW(sSplit)
1627  do
1628  {
1629  DEBUG_STRING_NEW(sChild)
1630 #if HHI_RQT_INTRA_SPEEDUP
1631  xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, bCheckFirst, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );
1632 #else
1633  xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );
1634 #endif
1635  DEBUG_STRING_APPEND(sSplit, sChild)
1636  uiSplitCbfLuma |= pcCU->getCbf( tuRecurseChild.GetAbsPartIdxTU(), COMPONENT_Y, tuRecurseChild.GetTransformDepthRel() );
1637  } while (tuRecurseChild.nextSection(rTu) );
1638 
1639  UInt uiPartsDiv = rTu.GetAbsPartIdxNumParts();
1640  {
1641  if (uiSplitCbfLuma)
1642  {
1643  const UInt flag=1<<uiTrDepth;
1644  UChar *pBase=pcCU->getCbf( COMPONENT_Y );
1645  for( UInt uiOffs = 0; uiOffs < uiPartsDiv; uiOffs++ )
1646  {
1647  pBase[ uiAbsPartIdx + uiOffs ] |= flag;
1648  }
1649  }
1650  }
1651  //----- restore context states -----
1653 
1654  //----- determine rate and r-d cost -----
1655  UInt uiSplitBits = xGetIntraBitsQT( rTu, true, false, false );
1656  dSplitCost = m_pcRdCost->calcRdCost( uiSplitBits, uiSplitDistLuma );
1657 
1658  //===== compare and set best =====
1659  if( dSplitCost < dSingleCost )
1660  {
1661  //--- update cost ---
1662  DEBUG_STRING_SWAP(sSplit, sDebug)
1663  ruiDistY += uiSplitDistLuma;
1664  dRDCost += dSplitCost;
1665 
1667  {
1668  const Int xOffset = rTu.getRect( COMPONENT_Y ).x0;
1669  const Int yOffset = rTu.getRect( COMPONENT_Y ).y0;
1670  for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
1671  {
1672  if (bMaintainResidual[storedResidualIndex])
1673  {
1674  xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSplit[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE);
1675  }
1676  }
1677  }
1678 
1679  return;
1680  }
1681 
1682  //----- set entropy coding status -----
1684 
1685  //--- set transform index and Cbf values ---
1686  pcCU->setTrIdxSubParts( uiTrDepth, uiAbsPartIdx, uiFullDepth );
1687  const TComRectangle &tuRect=rTu.getRect(COMPONENT_Y);
1688  const UInt totalAdjustedDepthChan = rTu.GetTransformDepthTotalAdj(COMPONENT_Y);
1689  pcCU->setCbfSubParts ( uiSingleCbfLuma << uiTrDepth, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
1690  pcCU ->setTransformSkipSubParts ( bestModeId[COMPONENT_Y], COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
1691 
1692  //--- set reconstruction for next intra prediction blocks ---
1693  const UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
1694  const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiAbsPartIdx;
1695  const UInt uiWidth = tuRect.width;
1696  const UInt uiHeight = tuRect.height;
1697  Pel* piSrc = m_pcQTTempTComYuv[ uiQTLayer ].getAddr( COMPONENT_Y, uiAbsPartIdx );
1698  UInt uiSrcStride = m_pcQTTempTComYuv[ uiQTLayer ].getStride ( COMPONENT_Y );
1699  Pel* piDes = pcCU->getPic()->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), uiZOrder );
1700  UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride ( COMPONENT_Y );
1701 
1702  for( UInt uiY = 0; uiY < uiHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride )
1703  {
1704  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1705  {
1706  piDes[ uiX ] = piSrc[ uiX ];
1707  }
1708  }
1709  }
1710  ruiDistY += uiSingleDistLuma;
1711  dRDCost += dSingleCost;
1712 }
1713 
1714 
1715 Void
1717 {
1718  TComDataCU *pcCU = rTu.getCU();
1719  const UInt uiTrDepth = rTu.GetTransformDepthRel();
1720  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1721  UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1722  if( uiTrMode == uiTrDepth )
1723  {
1724  UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
1725  UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
1726 
1727  //===== copy transform coefficients =====
1728 
1729  const TComRectangle &tuRect=rTu.getRect(COMPONENT_Y);
1730  const UInt coeffOffset = rTu.getCoefficientOffset(COMPONENT_Y);
1731  const UInt numCoeffInBlock = tuRect.width * tuRect.height;
1732 
1733  if (numCoeffInBlock!=0)
1734  {
1735  const TCoeff* srcCoeff = m_ppcQTTempCoeff[COMPONENT_Y][uiQTLayer] + coeffOffset;
1736  TCoeff* destCoeff = pcCU->getCoeff(COMPONENT_Y) + coeffOffset;
1737  ::memcpy( destCoeff, srcCoeff, sizeof(TCoeff)*numCoeffInBlock );
1738 #if ADAPTIVE_QP_SELECTION
1739  const TCoeff* srcArlCoeff = m_ppcQTTempArlCoeff[COMPONENT_Y][ uiQTLayer ] + coeffOffset;
1740  TCoeff* destArlCoeff = pcCU->getArlCoeff (COMPONENT_Y) + coeffOffset;
1741  ::memcpy( destArlCoeff, srcArlCoeff, sizeof( TCoeff ) * numCoeffInBlock );
1742 #endif
1743  m_pcQTTempTComYuv[ uiQTLayer ].copyPartToPartComponent( COMPONENT_Y, pcRecoYuv, uiAbsPartIdx, tuRect.width, tuRect.height );
1744  }
1745 
1746  }
1747  else
1748  {
1749  TComTURecurse tuRecurseChild(rTu, false);
1750  do
1751  {
1752  xSetIntraResultLumaQT( pcRecoYuv, tuRecurseChild );
1753  } while (tuRecurseChild.nextSection(rTu));
1754  }
1755 }
1756 
1757 
1758 Void
1760 {
1761  TComDataCU *pcCU=rTu.getCU();
1762  const UInt uiTrDepth = rTu.GetTransformDepthRel();
1763  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1764  const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1765  if ( compID==COMPONENT_Y || uiTrMode == uiTrDepth )
1766  {
1767  assert(uiTrMode == uiTrDepth);
1768  const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
1769  const UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
1770 
1771  if (rTu.ProcessComponentSection(compID))
1772  {
1773  const TComRectangle &tuRect=rTu.getRect(compID);
1774 
1775  //===== copy transform coefficients =====
1776  const UInt uiNumCoeff = tuRect.width * tuRect.height;
1777  TCoeff* pcCoeffSrc = m_ppcQTTempCoeff[compID] [ uiQTLayer ] + rTu.getCoefficientOffset(compID);
1778  TCoeff* pcCoeffDst = m_pcQTTempTUCoeff[compID];
1779 
1780  ::memcpy( pcCoeffDst, pcCoeffSrc, sizeof( TCoeff ) * uiNumCoeff );
1781 #if ADAPTIVE_QP_SELECTION
1782  TCoeff* pcArlCoeffSrc = m_ppcQTTempArlCoeff[compID] [ uiQTLayer ] + rTu.getCoefficientOffset(compID);
1783  TCoeff* pcArlCoeffDst = m_ppcQTTempTUArlCoeff[compID];
1784  ::memcpy( pcArlCoeffDst, pcArlCoeffSrc, sizeof( TCoeff ) * uiNumCoeff );
1785 #endif
1786  //===== copy reconstruction =====
1787  m_pcQTTempTComYuv[ uiQTLayer ].copyPartToPartComponent( compID, &m_pcQTTempTransformSkipTComYuv, uiAbsPartIdx, tuRect.width, tuRect.height );
1788  }
1789  }
1790 }
1791 
1792 
1793 Void
1795 {
1796  TComDataCU *pcCU=rTu.getCU();
1797  const UInt uiTrDepth = rTu.GetTransformDepthRel();
1798  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1799  const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1800  if ( compID==COMPONENT_Y || uiTrMode == uiTrDepth )
1801  {
1802  assert(uiTrMode == uiTrDepth);
1803  const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
1804  const UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
1805  const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiAbsPartIdx;
1806 
1807  if (rTu.ProcessComponentSection(compID))
1808  {
1809  const TComRectangle &tuRect=rTu.getRect(compID);
1810 
1811  //===== copy transform coefficients =====
1812  const UInt uiNumCoeff = tuRect.width * tuRect.height;
1813  TCoeff* pcCoeffDst = m_ppcQTTempCoeff[compID] [ uiQTLayer ] + rTu.getCoefficientOffset(compID);
1814  TCoeff* pcCoeffSrc = m_pcQTTempTUCoeff[compID];
1815 
1816  ::memcpy( pcCoeffDst, pcCoeffSrc, sizeof( TCoeff ) * uiNumCoeff );
1817 #if ADAPTIVE_QP_SELECTION
1818  TCoeff* pcArlCoeffDst = m_ppcQTTempArlCoeff[compID] [ uiQTLayer ] + rTu.getCoefficientOffset(compID);
1819  TCoeff* pcArlCoeffSrc = m_ppcQTTempTUArlCoeff[compID];
1820  ::memcpy( pcArlCoeffDst, pcArlCoeffSrc, sizeof( TCoeff ) * uiNumCoeff );
1821 #endif
1822  //===== copy reconstruction =====
1823  m_pcQTTempTransformSkipTComYuv.copyPartToPartComponent( compID, &m_pcQTTempTComYuv[ uiQTLayer ], uiAbsPartIdx, tuRect.width, tuRect.height );
1824 
1825  Pel* piRecIPred = pcCU->getPic()->getPicYuvRec()->getAddr( compID, pcCU->getCtuRsAddr(), uiZOrder );
1826  UInt uiRecIPredStride = pcCU->getPic()->getPicYuvRec()->getStride (compID);
1827  Pel* piRecQt = m_pcQTTempTComYuv[ uiQTLayer ].getAddr( compID, uiAbsPartIdx );
1828  UInt uiRecQtStride = m_pcQTTempTComYuv[ uiQTLayer ].getStride (compID);
1829  UInt uiWidth = tuRect.width;
1830  UInt uiHeight = tuRect.height;
1831  Pel* pRecQt = piRecQt;
1832  Pel* pRecIPred = piRecIPred;
1833  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1834  {
1835  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1836  {
1837  pRecIPred[ uiX ] = pRecQt [ uiX ];
1838  }
1839  pRecQt += uiRecQtStride;
1840  pRecIPred += uiRecIPredStride;
1841  }
1842  }
1843  }
1844 }
1845 
1846 Void
1848  const Pel *pResiSrc,
1849  TComTU &rTu,
1850  const Int xOffset,
1851  const Int yOffset,
1852  const Int strideDst,
1853  const Int strideSrc )
1854 {
1855  const Pel *pSrc = pResiSrc + yOffset * strideSrc + xOffset;
1856  Pel *pDst = pResiDst + yOffset * strideDst + xOffset;
1857 
1858  for( Int y = 0; y < rTu.getRect( COMPONENT_Y ).height; y++ )
1859  {
1860  ::memcpy( pDst, pSrc, sizeof(Pel) * rTu.getRect( COMPONENT_Y ).width );
1861  pDst += strideDst;
1862  pSrc += strideSrc;
1863  }
1864 }
1865 
1866 Char
1868  const ComponentID compID,
1869  const Pel* piResiL,
1870  const Pel* piResiC,
1871  const Int width,
1872  const Int height,
1873  const Int strideL,
1874  const Int strideC )
1875 {
1876  const Pel *pResiL = piResiL;
1877  const Pel *pResiC = piResiC;
1878 
1879  TComDataCU *pCU = rTu.getCU();
1880  const Int absPartIdx = rTu.GetAbsPartIdxTU( compID );
1881  const Int diffBitDepth = pCU->getSlice()->getSPS()->getDifferentialLumaChromaBitDepth();
1882 
1883  Char alpha = 0;
1884  Int SSxy = 0;
1885  Int SSxx = 0;
1886 
1887  for( UInt uiY = 0; uiY < height; uiY++ )
1888  {
1889  for( UInt uiX = 0; uiX < width; uiX++ )
1890  {
1891  const Pel scaledResiL = rightShift( pResiL[ uiX ], diffBitDepth );
1892  SSxy += ( scaledResiL * pResiC[ uiX ] );
1893  SSxx += ( scaledResiL * scaledResiL );
1894  }
1895 
1896  pResiL += strideL;
1897  pResiC += strideC;
1898  }
1899 
1900  if( SSxx != 0 )
1901  {
1902  Double dAlpha = SSxy / Double( SSxx );
1903  alpha = Char(Clip3<Int>(-16, 16, (Int)(dAlpha * 16)));
1904 
1905  static const Char alphaQuant[17] = {0, 1, 1, 2, 2, 2, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8};
1906 
1907  alpha = (alpha < 0) ? -alphaQuant[Int(-alpha)] : alphaQuant[Int(alpha)];
1908  }
1909  pCU->setCrossComponentPredictionAlphaPartRange( alpha, compID, absPartIdx, rTu.GetAbsPartIdxNumParts( compID ) );
1910 
1911  return alpha;
1912 }
1913 
1914 Void
1916  TComYuv* pcPredYuv,
1917  TComYuv* pcResiYuv,
1918  Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE],
1919  Distortion& ruiDist,
1920  TComTU& rTu
1921  DEBUG_STRING_FN_DECLARE(sDebug))
1922 {
1923  TComDataCU *pcCU = rTu.getCU();
1924  const UInt uiTrDepth = rTu.GetTransformDepthRel();
1925  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1926  const ChromaFormat format = rTu.GetChromaFormat();
1927  UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1928  const UInt numberValidComponents = getNumberValidComponents(format);
1929 
1930  if( uiTrMode == uiTrDepth )
1931  {
1932  if (!rTu.ProcessChannelSection(CHANNEL_TYPE_CHROMA))
1933  {
1934  return;
1935  }
1936 
1937  const UInt uiFullDepth = rTu.GetTransformDepthTotal();
1938 
1939  Bool checkTransformSkip = pcCU->getSlice()->getPPS()->getUseTransformSkip();
1940  checkTransformSkip &= TUCompRectHasAssociatedTransformSkipFlag(rTu.getRect(COMPONENT_Cb), pcCU->getSlice()->getPPS()->getTransformSkipLog2MaxSize());
1941 
1943  {
1944  checkTransformSkip &= TUCompRectHasAssociatedTransformSkipFlag(rTu.getRect(COMPONENT_Y), pcCU->getSlice()->getPPS()->getTransformSkipLog2MaxSize());
1945 
1946  if (checkTransformSkip)
1947  {
1948  Int nbLumaSkip = 0;
1949  const UInt maxAbsPartIdxSub=uiAbsPartIdx + (rTu.ProcessingAllQuadrants(COMPONENT_Cb)?1:4);
1950  for(UInt absPartIdxSub = uiAbsPartIdx; absPartIdxSub < maxAbsPartIdxSub; absPartIdxSub ++)
1951  {
1952  nbLumaSkip += pcCU->getTransformSkip(absPartIdxSub, COMPONENT_Y);
1953  }
1954  checkTransformSkip &= (nbLumaSkip > 0);
1955  }
1956  }
1957 
1958 
1959  for (UInt ch=COMPONENT_Cb; ch<numberValidComponents; ch++)
1960  {
1961  const ComponentID compID = ComponentID(ch);
1962  DEBUG_STRING_NEW(sDebugBestMode)
1963 
1964  //use RDO to decide whether Cr/Cb takes TS
1966 
1967  const Bool splitIntoSubTUs = rTu.getRect(compID).width != rTu.getRect(compID).height;
1968 
1969  TComTURecurse TUIterator(rTu, false, (splitIntoSubTUs ? TComTU::VERTICAL_SPLIT : TComTU::DONT_SPLIT), true, compID);
1970 
1971  const UInt partIdxesPerSubTU = TUIterator.GetAbsPartIdxNumParts(compID);
1972 
1973  do
1974  {
1975  const UInt subTUAbsPartIdx = TUIterator.GetAbsPartIdxTU(compID);
1976 
1977  Double dSingleCost = MAX_DOUBLE;
1978  Int bestModeId = 0;
1979  Distortion singleDistC = 0;
1980  UInt singleCbfC = 0;
1981  Distortion singleDistCTmp = 0;
1982  Double singleCostTmp = 0;
1983  UInt singleCbfCTmp = 0;
1984  Char bestCrossCPredictionAlpha = 0;
1985  Int bestTransformSkipMode = 0;
1986 
1987  const Bool checkCrossComponentPrediction = (pcCU->getIntraDir(CHANNEL_TYPE_CHROMA, subTUAbsPartIdx) == DM_CHROMA_IDX)
1989  && (pcCU->getCbf(subTUAbsPartIdx, COMPONENT_Y, uiTrDepth) != 0);
1990 
1991  const Int crossCPredictionModesToTest = checkCrossComponentPrediction ? 2 : 1;
1992  const Int transformSkipModesToTest = checkTransformSkip ? 2 : 1;
1993  const Int totalModesToTest = crossCPredictionModesToTest * transformSkipModesToTest;
1994  Int currModeId = 0;
1995  Int default0Save1Load2 = 0;
1996 
1997  for(Int transformSkipModeId = 0; transformSkipModeId < transformSkipModesToTest; transformSkipModeId++)
1998  {
1999  for(Int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++)
2000  {
2001  pcCU->setCrossComponentPredictionAlphaPartRange(0, compID, subTUAbsPartIdx, partIdxesPerSubTU);
2002  DEBUG_STRING_NEW(sDebugMode)
2003  pcCU->setTransformSkipPartRange( transformSkipModeId, compID, subTUAbsPartIdx, partIdxesPerSubTU );
2004  currModeId++;
2005 
2006  const Bool isOneMode = (totalModesToTest == 1);
2007  const Bool isLastMode = (currModeId == totalModesToTest); // currModeId is indexed from 1
2008 
2009  if (isOneMode)
2010  {
2011  default0Save1Load2 = 0;
2012  }
2013  else if (!isOneMode && (transformSkipModeId == 0) && (crossCPredictionModeId == 0))
2014  {
2015  default0Save1Load2 = 1; //save prediction on first mode
2016  }
2017  else
2018  {
2019  default0Save1Load2 = 2; //load it on subsequent modes
2020  }
2021 
2022  singleDistCTmp = 0;
2023 
2024  xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLuma, (crossCPredictionModeId != 0), singleDistCTmp, compID, TUIterator DEBUG_STRING_PASS_INTO(sDebugMode), default0Save1Load2);
2025  singleCbfCTmp = pcCU->getCbf( subTUAbsPartIdx, compID, uiTrDepth);
2026 
2027  if ( ((crossCPredictionModeId == 1) && (pcCU->getCrossComponentPredictionAlpha(subTUAbsPartIdx, compID) == 0))
2028  || ((transformSkipModeId == 1) && (singleCbfCTmp == 0))) //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
2029  {
2030  singleCostTmp = MAX_DOUBLE;
2031  }
2032  else if (!isOneMode)
2033  {
2034  UInt bitsTmp = xGetIntraBitsQTChroma( TUIterator, compID, false );
2035  singleCostTmp = m_pcRdCost->calcRdCost( bitsTmp, singleDistCTmp);
2036  }
2037 
2038  if(singleCostTmp < dSingleCost)
2039  {
2040  DEBUG_STRING_SWAP(sDebugBestMode, sDebugMode)
2041  dSingleCost = singleCostTmp;
2042  singleDistC = singleDistCTmp;
2043  bestCrossCPredictionAlpha = (crossCPredictionModeId != 0) ? pcCU->getCrossComponentPredictionAlpha(subTUAbsPartIdx, compID) : 0;
2044  bestTransformSkipMode = transformSkipModeId;
2045  bestModeId = currModeId;
2046  singleCbfC = singleCbfCTmp;
2047 
2048  if (!isOneMode && !isLastMode)
2049  {
2050  xStoreIntraResultQT(compID, TUIterator);
2052  }
2053  }
2054 
2055  if (!isOneMode && !isLastMode)
2056  {
2057  m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );
2058  }
2059  }
2060  }
2061 
2062  if(bestModeId < totalModesToTest)
2063  {
2064  xLoadIntraResultQT(compID, TUIterator);
2065  pcCU->setCbfPartRange( singleCbfC << uiTrDepth, compID, subTUAbsPartIdx, partIdxesPerSubTU );
2066 
2068  }
2069 
2070  DEBUG_STRING_APPEND(sDebug, sDebugBestMode)
2071  pcCU ->setTransformSkipPartRange ( bestTransformSkipMode, compID, subTUAbsPartIdx, partIdxesPerSubTU );
2072  pcCU ->setCrossComponentPredictionAlphaPartRange( bestCrossCPredictionAlpha, compID, subTUAbsPartIdx, partIdxesPerSubTU );
2073  ruiDist += singleDistC;
2074  } while (TUIterator.nextSection(rTu));
2075 
2076  if (splitIntoSubTUs)
2077  {
2078  offsetSubTUCBFs(rTu, compID);
2079  }
2080  }
2081  }
2082  else
2083  {
2084  UInt uiSplitCbf[MAX_NUM_COMPONENT] = {0,0,0};
2085 
2086  TComTURecurse tuRecurseChild(rTu, false);
2087  const UInt uiTrDepthChild = tuRecurseChild.GetTransformDepthRel();
2088  do
2089  {
2090  DEBUG_STRING_NEW(sChild)
2091 
2092  xRecurIntraChromaCodingQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLuma, ruiDist, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );
2093 
2094  DEBUG_STRING_APPEND(sDebug, sChild)
2095  const UInt uiAbsPartIdxSub=tuRecurseChild.GetAbsPartIdxTU();
2096 
2097  for(UInt ch=COMPONENT_Cb; ch<numberValidComponents; ch++)
2098  {
2099  uiSplitCbf[ch] |= pcCU->getCbf( uiAbsPartIdxSub, ComponentID(ch), uiTrDepthChild );
2100  }
2101  } while ( tuRecurseChild.nextSection(rTu) );
2102 
2103 
2104  UInt uiPartsDiv = rTu.GetAbsPartIdxNumParts();
2105  for(UInt ch=COMPONENT_Cb; ch<numberValidComponents; ch++)
2106  {
2107  if (uiSplitCbf[ch])
2108  {
2109  const UInt flag=1<<uiTrDepth;
2110  ComponentID compID=ComponentID(ch);
2111  UChar *pBase=pcCU->getCbf( compID );
2112  for( UInt uiOffs = 0; uiOffs < uiPartsDiv; uiOffs++ )
2113  {
2114  pBase[ uiAbsPartIdx + uiOffs ] |= flag;
2115  }
2116  }
2117  }
2118  }
2119 }
2120 
2121 
2122 
2123 
2124 Void
2126 {
2128  {
2129  return;
2130  }
2131  TComDataCU *pcCU=rTu.getCU();
2132  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
2133  const UInt uiTrDepth = rTu.GetTransformDepthRel();
2134  UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
2135  if( uiTrMode == uiTrDepth )
2136  {
2137  UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
2138  UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
2139 
2140  //===== copy transform coefficients =====
2141  const TComRectangle &tuRectCb=rTu.getRect(COMPONENT_Cb);
2142  UInt uiNumCoeffC = tuRectCb.width*tuRectCb.height;//( pcCU->getSlice()->getSPS()->getMaxCUWidth() * pcCU->getSlice()->getSPS()->getMaxCUHeight() ) >> ( uiFullDepth << 1 );
2143  const UInt offset = rTu.getCoefficientOffset(COMPONENT_Cb);
2144 
2145  const UInt numberValidComponents = getNumberValidComponents(rTu.GetChromaFormat());
2146  for (UInt ch=COMPONENT_Cb; ch<numberValidComponents; ch++)
2147  {
2148  const ComponentID component = ComponentID(ch);
2149  const TCoeff* src = m_ppcQTTempCoeff[component][uiQTLayer] + offset;//(uiNumCoeffIncC*uiAbsPartIdx);
2150  TCoeff* dest = pcCU->getCoeff(component) + offset;//(uiNumCoeffIncC*uiAbsPartIdx);
2151  ::memcpy( dest, src, sizeof(TCoeff)*uiNumCoeffC );
2152 #if ADAPTIVE_QP_SELECTION
2153  TCoeff* pcArlCoeffSrc = m_ppcQTTempArlCoeff[component][ uiQTLayer ] + offset;//( uiNumCoeffIncC * uiAbsPartIdx );
2154  TCoeff* pcArlCoeffDst = pcCU->getArlCoeff(component) + offset;//( uiNumCoeffIncC * uiAbsPartIdx );
2155  ::memcpy( pcArlCoeffDst, pcArlCoeffSrc, sizeof( TCoeff ) * uiNumCoeffC );
2156 #endif
2157  }
2158 
2159  //===== copy reconstruction =====
2160 
2161  m_pcQTTempTComYuv[ uiQTLayer ].copyPartToPartComponent( COMPONENT_Cb, pcRecoYuv, uiAbsPartIdx, tuRectCb.width, tuRectCb.height );
2162  m_pcQTTempTComYuv[ uiQTLayer ].copyPartToPartComponent( COMPONENT_Cr, pcRecoYuv, uiAbsPartIdx, tuRectCb.width, tuRectCb.height );
2163  }
2164  else
2165  {
2166  TComTURecurse tuRecurseChild(rTu, false);
2167  do
2168  {
2169  xSetIntraResultChromaQT( pcRecoYuv, tuRecurseChild );
2170  } while (tuRecurseChild.nextSection(rTu));
2171  }
2172 }
2173 
2174 
2175 
2176 Void
2178  TComYuv* pcOrgYuv,
2179  TComYuv* pcPredYuv,
2180  TComYuv* pcResiYuv,
2181  TComYuv* pcRecoYuv,
2182  Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]
2183  DEBUG_STRING_FN_DECLARE(sDebug))
2184 {
2185  const UInt uiDepth = pcCU->getDepth(0);
2186  const UInt uiInitTrDepth = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
2187  const UInt uiNumPU = 1<<(2*uiInitTrDepth);
2188  const UInt uiQNumParts = pcCU->getTotalNumPart() >> 2;
2189  const UInt uiWidthBit = pcCU->getIntraSizeIdx(0);
2190  const ChromaFormat chFmt = pcCU->getPic()->getChromaFormat();
2191  const UInt numberValidComponents = getNumberValidComponents(chFmt);
2192  Distortion uiOverallDistY = 0;
2193  UInt CandNum;
2194  Double CandCostList[ FAST_UDI_MAX_RDMODE_NUM ];
2195  Pel resiLumaPU[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];
2196 
2197  Bool bMaintainResidual[NUMBER_OF_STORED_RESIDUAL_TYPES];
2198  for (UInt residualTypeIndex = 0; residualTypeIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; residualTypeIndex++)
2199  {
2200  bMaintainResidual[residualTypeIndex] = true; //assume true unless specified otherwise
2201  }
2202 
2204 
2205  // Lambda calculation at equivalent Qp of 4 is recommended because at that Qp, the quantisation divisor is 1.
2206 #if FULL_NBIT
2207  const Double sqrtLambdaForFirstPass= (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING && pcCU->getCUTransquantBypass(0)) ?
2208  sqrt(0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0)))
2210 #else
2211  const Double sqrtLambdaForFirstPass= (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING && pcCU->getCUTransquantBypass(0)) ?
2212  sqrt(0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (g_bitDepth[CHANNEL_TYPE_LUMA] - 8)) / 3.0)))
2214 #endif
2215 
2216  //===== set QP and clear Cbf =====
2217  if ( pcCU->getSlice()->getPPS()->getUseDQP() == true)
2218  {
2219  pcCU->setQPSubParts( pcCU->getQP(0), 0, uiDepth );
2220  }
2221  else
2222  {
2223  pcCU->setQPSubParts( pcCU->getSlice()->getSliceQp(), 0, uiDepth );
2224  }
2225 
2226  //===== loop over partitions =====
2227  TComTURecurse tuRecurseCU(pcCU, 0);
2228  TComTURecurse tuRecurseWithPU(tuRecurseCU, false, (uiInitTrDepth==0)?TComTU::DONT_SPLIT : TComTU::QUAD_SPLIT);
2229 
2230  do
2231  {
2232  const UInt uiPartOffset=tuRecurseWithPU.GetAbsPartIdxTU();
2233 // for( UInt uiPU = 0, uiPartOffset=0; uiPU < uiNumPU; uiPU++, uiPartOffset += uiQNumParts )
2234  //{
2235  //===== init pattern for luma prediction =====
2236  Bool bAboveAvail = false;
2237  Bool bLeftAvail = false;
2238  DEBUG_STRING_NEW(sTemp2)
2239 
2240  //===== determine set of modes to be tested (using prediction signal only) =====
2241  Int numModesAvailable = 35; //total number of Intra modes
2242  UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM];
2243  Int numModesForFullRD = g_aucIntraModeNumFast[ uiWidthBit ];
2244 
2245  if (tuRecurseWithPU.ProcessComponentSection(COMPONENT_Y))
2246  {
2247  initAdiPatternChType( tuRecurseWithPU, bAboveAvail, bLeftAvail, COMPONENT_Y, true DEBUG_STRING_PASS_INTO(sTemp2) );
2248  }
2249 
2250  Bool doFastSearch = (numModesForFullRD != numModesAvailable);
2251  if (doFastSearch)
2252  {
2253  assert(numModesForFullRD < numModesAvailable);
2254 
2255  for( Int i=0; i < numModesForFullRD; i++ )
2256  {
2257  CandCostList[ i ] = MAX_DOUBLE;
2258  }
2259  CandNum = 0;
2260 
2261  const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y);
2262  const UInt uiAbsPartIdx=tuRecurseWithPU.GetAbsPartIdxTU();
2263 
2264  Pel* piOrg = pcOrgYuv ->getAddr( COMPONENT_Y, uiAbsPartIdx );
2265  Pel* piPred = pcPredYuv->getAddr( COMPONENT_Y, uiAbsPartIdx );
2266  UInt uiStride = pcPredYuv->getStride( COMPONENT_Y );
2267  DistParam distParam;
2268  const Bool bUseHadamard=pcCU->getCUTransquantBypass(0) == 0;
2269  m_pcRdCost->setDistParam(distParam, g_bitDepth[CHANNEL_TYPE_LUMA], piOrg, uiStride, piPred, uiStride, puRect.width, puRect.height, bUseHadamard);
2270  distParam.bApplyWeight = false;
2271  for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ )
2272  {
2273  UInt uiMode = modeIdx;
2274  Distortion uiSad = 0;
2275 
2276  const Bool bUseFilter=TComPrediction::filteringIntraReferenceSamples(COMPONENT_Y, uiMode, puRect.width, puRect.height, chFmt, pcCU->getSlice()->getSPS()->getDisableIntraReferenceSmoothing());
2277 
2278  predIntraAng( COMPONENT_Y, uiMode, piOrg, uiStride, piPred, uiStride, tuRecurseWithPU, bAboveAvail, bLeftAvail, bUseFilter, TComPrediction::UseDPCMForFirstPassIntraEstimation(tuRecurseWithPU, uiMode) );
2279 
2280  // use hadamard transform here
2281  uiSad+=distParam.DistFunc(&distParam);
2282 
2283  UInt iModeBits = 0;
2284 
2285  // NB xModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
2286  iModeBits+=xModeBitsIntra( pcCU, uiMode, uiPartOffset, uiDepth, uiInitTrDepth, CHANNEL_TYPE_LUMA );
2287 
2288  Double cost = (Double)uiSad + (Double)iModeBits * sqrtLambdaForFirstPass;
2289 
2290 #ifdef DEBUG_INTRA_SEARCH_COSTS
2291  std::cout << "1st pass mode " << uiMode << " SAD = " << uiSad << ", mode bits = " << iModeBits << ", cost = " << cost << "\n";
2292 #endif
2293 
2294  CandNum += xUpdateCandList( uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList );
2295  }
2296 
2297 #if FAST_UDI_USE_MPM
2298  Int uiPreds[NUM_MOST_PROBABLE_MODES] = {-1, -1, -1};
2299 
2300  Int iMode = -1;
2301  pcCU->getIntraDirPredictor( uiPartOffset, uiPreds, COMPONENT_Y, &iMode );
2302 
2303  const Int numCand = ( iMode >= 0 ) ? iMode : Int(NUM_MOST_PROBABLE_MODES);
2304 
2305  for( Int j=0; j < numCand; j++)
2306  {
2307  Bool mostProbableModeIncluded = false;
2308  Int mostProbableMode = uiPreds[j];
2309 
2310  for( Int i=0; i < numModesForFullRD; i++)
2311  {
2312  mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]);
2313  }
2314  if (!mostProbableModeIncluded)
2315  {
2316  uiRdModeList[numModesForFullRD++] = mostProbableMode;
2317  }
2318  }
2319 #endif // FAST_UDI_USE_MPM
2320  }
2321  else
2322  {
2323  for( Int i=0; i < numModesForFullRD; i++)
2324  {
2325  uiRdModeList[i] = i;
2326  }
2327  }
2328 
2329  //===== check modes (using r-d costs) =====
2330 #if HHI_RQT_INTRA_SPEEDUP_MOD
2331  UInt uiSecondBestMode = MAX_UINT;
2332  Double dSecondBestPUCost = MAX_DOUBLE;
2333 #endif
2334  DEBUG_STRING_NEW(sPU)
2335  UInt uiBestPUMode = 0;
2336  Distortion uiBestPUDistY = 0;
2337  Double dBestPUCost = MAX_DOUBLE;
2338 
2339 #if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST
2340  UInt max=numModesForFullRD;
2341 
2342  if (DebugOptionList::ForceLumaMode.isSet())
2343  {
2344  max=0; // we are forcing a direction, so don't bother with mode check
2345  }
2346  for ( UInt uiMode = 0; uiMode < max; uiMode++)
2347 #else
2348  for( UInt uiMode = 0; uiMode < numModesForFullRD; uiMode++ )
2349 #endif
2350  {
2351  // set luma prediction mode
2352  UInt uiOrgMode = uiRdModeList[uiMode];
2353 
2354  pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );
2355 
2356  DEBUG_STRING_NEW(sMode)
2357  // set context models
2359 
2360  // determine residual for partition
2361  Distortion uiPUDistY = 0;
2362  Double dPUCost = 0.0;
2363 #if HHI_RQT_INTRA_SPEEDUP
2364  xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, true, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
2365 #else
2366  xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
2367 #endif
2368 
2369 #ifdef DEBUG_INTRA_SEARCH_COSTS
2370  std::cout << "2nd pass [luma,chroma] mode [" << Int(pcCU->getIntraDir(CHANNEL_TYPE_LUMA, uiPartOffset)) << "," << Int(pcCU->getIntraDir(CHANNEL_TYPE_CHROMA, uiPartOffset)) << "] cost = " << dPUCost << "\n";
2371 #endif
2372 
2373  // check r-d cost
2374  if( dPUCost < dBestPUCost )
2375  {
2376  DEBUG_STRING_SWAP(sPU, sMode)
2377 #if HHI_RQT_INTRA_SPEEDUP_MOD
2378  uiSecondBestMode = uiBestPUMode;
2379  dSecondBestPUCost = dBestPUCost;
2380 #endif
2381  uiBestPUMode = uiOrgMode;
2382  uiBestPUDistY = uiPUDistY;
2383  dBestPUCost = dPUCost;
2384 
2385  xSetIntraResultLumaQT( pcRecoYuv, tuRecurseWithPU );
2386 
2388  {
2389  const Int xOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).x0;
2390  const Int yOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).y0;
2391  for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
2392  {
2393  if (bMaintainResidual[storedResidualIndex])
2394  {
2395  xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaPU[storedResidualIndex], tuRecurseWithPU, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE );
2396  }
2397  }
2398  }
2399 
2400  UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
2401 
2402  ::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) );
2403  for (UInt component = 0; component < numberValidComponents; component++)
2404  {
2405  const ComponentID compID = ComponentID(component);
2406  ::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
2407  ::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip(compID) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
2408  }
2409  }
2410 #if HHI_RQT_INTRA_SPEEDUP_MOD
2411  else if( dPUCost < dSecondBestPUCost )
2412  {
2413  uiSecondBestMode = uiOrgMode;
2414  dSecondBestPUCost = dPUCost;
2415  }
2416 #endif
2417  } // Mode loop
2418 
2419 #if HHI_RQT_INTRA_SPEEDUP
2420 #if HHI_RQT_INTRA_SPEEDUP_MOD
2421  for( UInt ui =0; ui < 2; ++ui )
2422 #endif
2423  {
2424 #if HHI_RQT_INTRA_SPEEDUP_MOD
2425  UInt uiOrgMode = ui ? uiSecondBestMode : uiBestPUMode;
2426  if( uiOrgMode == MAX_UINT )
2427  {
2428  break;
2429  }
2430 #else
2431  UInt uiOrgMode = uiBestPUMode;
2432 #endif
2433 
2434 #if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST
2435  if (DebugOptionList::ForceLumaMode.isSet())
2436  {
2437  uiOrgMode = DebugOptionList::ForceLumaMode.getInt();
2438  }
2439 #endif
2440 
2441  pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );
2442  DEBUG_STRING_NEW(sModeTree)
2443 
2444  // set context models
2446 
2447  // determine residual for partition
2448  Distortion uiPUDistY = 0;
2449  Double dPUCost = 0.0;
2450 
2451  xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, false, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sModeTree));
2452 
2453  // check r-d cost
2454  if( dPUCost < dBestPUCost )
2455  {
2456  DEBUG_STRING_SWAP(sPU, sModeTree)
2457  uiBestPUMode = uiOrgMode;
2458  uiBestPUDistY = uiPUDistY;
2459  dBestPUCost = dPUCost;
2460 
2461  xSetIntraResultLumaQT( pcRecoYuv, tuRecurseWithPU );
2462 
2464  {
2465  const Int xOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).x0;
2466  const Int yOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).y0;
2467  for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
2468  {
2469  if (bMaintainResidual[storedResidualIndex])
2470  {
2471  xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaPU[storedResidualIndex], tuRecurseWithPU, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE );
2472  }
2473  }
2474  }
2475 
2476  const UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
2477  ::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) );
2478 
2479  for (UInt component = 0; component < numberValidComponents; component++)
2480  {
2481  const ComponentID compID = ComponentID(component);
2482  ::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
2483  ::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip(compID) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
2484  }
2485  }
2486  } // Mode loop
2487 #endif
2488 
2489  DEBUG_STRING_APPEND(sDebug, sPU)
2490 
2491  //--- update overall distortion ---
2492  uiOverallDistY += uiBestPUDistY;
2493 
2494  //--- update transform index and cbf ---
2495  const UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
2496  ::memcpy( pcCU->getTransformIdx() + uiPartOffset, m_puhQTTempTrIdx, uiQPartNum * sizeof( UChar ) );
2497  for (UInt component = 0; component < numberValidComponents; component++)
2498  {
2499  const ComponentID compID = ComponentID(component);
2500  ::memcpy( pcCU->getCbf( compID ) + uiPartOffset, m_puhQTTempCbf[compID], uiQPartNum * sizeof( UChar ) );
2501  ::memcpy( pcCU->getTransformSkip( compID ) + uiPartOffset, m_puhQTTempTransformSkipFlag[compID ], uiQPartNum * sizeof( UChar ) );
2502  }
2503 
2504  //--- set reconstruction for next intra prediction blocks ---
2505  if( !tuRecurseWithPU.IsLastSection() )
2506  {
2507  const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y);
2508  const UInt uiCompWidth = puRect.width;
2509  const UInt uiCompHeight = puRect.height;
2510 
2511  const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiPartOffset;
2512  Pel* piDes = pcCU->getPic()->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), uiZOrder );
2513  const UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride( COMPONENT_Y);
2514  const Pel* piSrc = pcRecoYuv->getAddr( COMPONENT_Y, uiPartOffset );
2515  const UInt uiSrcStride = pcRecoYuv->getStride( COMPONENT_Y);
2516 
2517  for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride )
2518  {
2519  for( UInt uiX = 0; uiX < uiCompWidth; uiX++ )
2520  {
2521  piDes[ uiX ] = piSrc[ uiX ];
2522  }
2523  }
2524  }
2525 
2526  //=== update PU data ====
2527  pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiBestPUMode, uiPartOffset, uiDepth + uiInitTrDepth );
2528  } while (tuRecurseWithPU.nextSection(tuRecurseCU));
2529 
2530 
2531  if( uiNumPU > 1 )
2532  { // set Cbf for all blocks
2533  UInt uiCombCbfY = 0;
2534  UInt uiCombCbfU = 0;
2535  UInt uiCombCbfV = 0;
2536  UInt uiPartIdx = 0;
2537  for( UInt uiPart = 0; uiPart < 4; uiPart++, uiPartIdx += uiQNumParts )
2538  {
2539  uiCombCbfY |= pcCU->getCbf( uiPartIdx, COMPONENT_Y, 1 );
2540  uiCombCbfU |= pcCU->getCbf( uiPartIdx, COMPONENT_Cb, 1 );
2541  uiCombCbfV |= pcCU->getCbf( uiPartIdx, COMPONENT_Cr, 1 );
2542  }
2543  for( UInt uiOffs = 0; uiOffs < 4 * uiQNumParts; uiOffs++ )
2544  {
2545  pcCU->getCbf( COMPONENT_Y )[ uiOffs ] |= uiCombCbfY;
2546  pcCU->getCbf( COMPONENT_Cb )[ uiOffs ] |= uiCombCbfU;
2547  pcCU->getCbf( COMPONENT_Cr )[ uiOffs ] |= uiCombCbfV;
2548  }
2549  }
2550 
2551  //===== reset context models =====
2553 
2554  //===== set distortion (rate and r-d costs are determined later) =====
2555  pcCU->getTotalDistortion() = uiOverallDistY;
2556 }
2557 
2558 
2559 
2560 
2561 Void
2563  TComYuv* pcOrgYuv,
2564  TComYuv* pcPredYuv,
2565  TComYuv* pcResiYuv,
2566  TComYuv* pcRecoYuv,
2567  Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]
2568  DEBUG_STRING_FN_DECLARE(sDebug))
2569 {
2570  const UInt uiInitTrDepth = pcCU->getPartitionSize(0) != SIZE_2Nx2N && enable4ChromaPUsInIntraNxNCU(pcOrgYuv->getChromaFormat()) ? 1 : 0;
2571 
2572  TComTURecurse tuRecurseCU(pcCU, 0);
2573  TComTURecurse tuRecurseWithPU(tuRecurseCU, false, (uiInitTrDepth==0)?TComTU::DONT_SPLIT : TComTU::QUAD_SPLIT);
2574  const UInt uiQNumParts = tuRecurseWithPU.GetAbsPartIdxNumParts();
2575  const UInt uiDepthCU=tuRecurseWithPU.getCUDepth();
2576  const UInt numberValidComponents = pcCU->getPic()->getNumberValidComponents();
2577 
2578  do
2579  {
2580  UInt uiBestMode = 0;
2581  Distortion uiBestDist = 0;
2582  Double dBestCost = MAX_DOUBLE;
2583 
2584  //----- init mode list -----
2585  if (tuRecurseWithPU.ProcessChannelSection(CHANNEL_TYPE_CHROMA))
2586  {
2587  UInt uiModeList[FAST_UDI_MAX_RDMODE_NUM];
2588  const UInt uiQPartNum = uiQNumParts;
2589  const UInt uiPartOffset = tuRecurseWithPU.GetAbsPartIdxTU();
2590  {
2591  UInt uiMinMode = 0;
2592  UInt uiMaxMode = NUM_CHROMA_MODE;
2593 
2594  //----- check chroma modes -----
2595  pcCU->getAllowedChromaDir( uiPartOffset, uiModeList );
2596 
2597 #if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST
2599  {
2601  if (uiModeList[uiMinMode]==34)
2602  {
2603  uiMinMode=4; // if the fixed mode has been renumbered because DM_CHROMA covers it, use DM_CHROMA.
2604  }
2605  uiMaxMode=uiMinMode+1;
2606  }
2607 #endif
2608 
2609  DEBUG_STRING_NEW(sPU)
2610 
2611  for( UInt uiMode = uiMinMode; uiMode < uiMaxMode; uiMode++ )
2612  {
2613  //----- restore context models -----
2615 
2616  DEBUG_STRING_NEW(sMode)
2617  //----- chroma coding -----
2618  Distortion uiDist = 0;
2619  pcCU->setIntraDirSubParts ( CHANNEL_TYPE_CHROMA, uiModeList[uiMode], uiPartOffset, uiDepthCU+uiInitTrDepth );
2620  xRecurIntraChromaCodingQT ( pcOrgYuv, pcPredYuv, pcResiYuv, resiLuma, uiDist, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
2621 
2622  if( pcCU->getSlice()->getPPS()->getUseTransformSkip() )
2623  {
2624  m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepthCU][CI_CURR_BEST] );
2625  }
2626 
2627  UInt uiBits = xGetIntraBitsQT( tuRecurseWithPU, false, true, false );
2628  Double dCost = m_pcRdCost->calcRdCost( uiBits, uiDist );
2629 
2630  //----- compare -----
2631  if( dCost < dBestCost )
2632  {
2633  DEBUG_STRING_SWAP(sPU, sMode);
2634  dBestCost = dCost;
2635  uiBestDist = uiDist;
2636  uiBestMode = uiModeList[uiMode];
2637 
2638  xSetIntraResultChromaQT( pcRecoYuv, tuRecurseWithPU );
2639  for (UInt componentIndex = COMPONENT_Cb; componentIndex < numberValidComponents; componentIndex++)
2640  {
2641  const ComponentID compID = ComponentID(componentIndex);
2642  ::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID )+uiPartOffset, uiQPartNum * sizeof( UChar ) );
2643  ::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip( compID )+uiPartOffset, uiQPartNum * sizeof( UChar ) );
2644  ::memcpy( m_phQTTempCrossComponentPredictionAlpha[compID], pcCU->getCrossComponentPredictionAlpha(compID)+uiPartOffset, uiQPartNum * sizeof( Char ) );
2645  }
2646  }
2647  }
2648 
2649  DEBUG_STRING_APPEND(sDebug, sPU)
2650 
2651  //----- set data -----
2652  for (UInt componentIndex = COMPONENT_Cb; componentIndex < numberValidComponents; componentIndex++)
2653  {
2654  const ComponentID compID = ComponentID(componentIndex);
2655  ::memcpy( pcCU->getCbf( compID )+uiPartOffset, m_puhQTTempCbf[compID], uiQPartNum * sizeof( UChar ) );
2656  ::memcpy( pcCU->getTransformSkip( compID )+uiPartOffset, m_puhQTTempTransformSkipFlag[compID], uiQPartNum * sizeof( UChar ) );
2657  ::memcpy( pcCU->getCrossComponentPredictionAlpha(compID)+uiPartOffset, m_phQTTempCrossComponentPredictionAlpha[compID], uiQPartNum * sizeof( Char ) );
2658  }
2659  }
2660 
2661  if( ! tuRecurseWithPU.IsLastSection() )
2662  {
2663  for (UInt ch=COMPONENT_Cb; ch<numberValidComponents; ch++)
2664  {
2665  const ComponentID compID = ComponentID(ch);
2666  const TComRectangle &tuRect = tuRecurseWithPU.getRect(compID);
2667  const UInt uiCompWidth = tuRect.width;
2668  const UInt uiCompHeight = tuRect.height;
2669  const UInt uiZOrder = pcCU->getZorderIdxInCtu() + tuRecurseWithPU.GetAbsPartIdxTU();
2670  Pel* piDes = pcCU->getPic()->getPicYuvRec()->getAddr( compID, pcCU->getCtuRsAddr(), uiZOrder );
2671  const UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride( compID);
2672  const Pel* piSrc = pcRecoYuv->getAddr( compID, uiPartOffset );
2673  const UInt uiSrcStride = pcRecoYuv->getStride( compID);
2674 
2675  for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride )
2676  {
2677  for( UInt uiX = 0; uiX < uiCompWidth; uiX++ )
2678  {
2679  piDes[ uiX ] = piSrc[ uiX ];
2680  }
2681  }
2682  }
2683  }
2684 
2685  pcCU->setIntraDirSubParts( CHANNEL_TYPE_CHROMA, uiBestMode, uiPartOffset, uiDepthCU+uiInitTrDepth );
2686  pcCU->getTotalDistortion () += uiBestDist;
2687  }
2688 
2689  } while (tuRecurseWithPU.nextSection(tuRecurseCU));
2690 
2691  //----- restore context models -----
2692 
2693  if( uiInitTrDepth != 0 )
2694  { // set Cbf for all blocks
2695  UInt uiCombCbfU = 0;
2696  UInt uiCombCbfV = 0;
2697  UInt uiPartIdx = 0;
2698  for( UInt uiPart = 0; uiPart < 4; uiPart++, uiPartIdx += uiQNumParts )
2699  {
2700  uiCombCbfU |= pcCU->getCbf( uiPartIdx, COMPONENT_Cb, 1 );
2701  uiCombCbfV |= pcCU->getCbf( uiPartIdx, COMPONENT_Cr, 1 );
2702  }
2703  for( UInt uiOffs = 0; uiOffs < 4 * uiQNumParts; uiOffs++ )
2704  {
2705  pcCU->getCbf( COMPONENT_Cb )[ uiOffs ] |= uiCombCbfU;
2706  pcCU->getCbf( COMPONENT_Cr )[ uiOffs ] |= uiCombCbfV;
2707  }
2708  }
2709 
2711 }
2712 
2713 
2714 
2715 
2729 Void TEncSearch::xEncPCM (TComDataCU* pcCU, UInt uiAbsPartIdx, Pel* pOrg, Pel* pPCM, Pel* pPred, Pel* pResi, Pel* pReco, UInt uiStride, UInt uiWidth, UInt uiHeight, const ComponentID compID )
2730 {
2731  const UInt uiReconStride = pcCU->getPic()->getPicYuvRec()->getStride(compID);
2732  const UInt uiPCMBitDepth = pcCU->getSlice()->getSPS()->getPCMBitDepth(toChannelType(compID));
2733  Pel* pRecoPic = pcCU->getPic()->getPicYuvRec()->getAddr(compID, pcCU->getCtuRsAddr(), pcCU->getZorderIdxInCtu()+uiAbsPartIdx);
2734 
2735  const Int pcmShiftRight=(g_bitDepth[toChannelType(compID)] - Int(uiPCMBitDepth));
2736 
2737  assert(pcmShiftRight >= 0);
2738 
2739  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
2740  {
2741  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
2742  {
2743  // Reset pred and residual
2744  pPred[uiX] = 0;
2745  pResi[uiX] = 0;
2746  // Encode
2747  pPCM[uiX] = (pOrg[uiX]>>pcmShiftRight);
2748  // Reconstruction
2749  pReco [uiX] = (pPCM[uiX]<<(pcmShiftRight));
2750  pRecoPic[uiX] = pReco[uiX];
2751  }
2752  pPred += uiStride;
2753  pResi += uiStride;
2754  pPCM += uiWidth;
2755  pOrg += uiStride;
2756  pReco += uiStride;
2757  pRecoPic += uiReconStride;
2758  }
2759 }
2760 
2761 
2763 Void TEncSearch::IPCMSearch( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv )
2764 {
2765  UInt uiDepth = pcCU->getDepth(0);
2766  const UInt uiDistortion = 0;
2767  UInt uiBits;
2768 
2769  Double dCost;
2770 
2771  for (UInt ch=0; ch < pcCU->getPic()->getNumberValidComponents(); ch++)
2772  {
2773  const ComponentID compID = ComponentID(ch);
2774  const UInt width = pcCU->getWidth(0) >> pcCU->getPic()->getComponentScaleX(compID);
2775  const UInt height = pcCU->getHeight(0) >> pcCU->getPic()->getComponentScaleY(compID);
2776  const UInt stride = pcPredYuv->getStride(compID);
2777 
2778  Pel * pOrig = pcOrgYuv->getAddr (compID, 0, width);
2779  Pel * pResi = pcResiYuv->getAddr(compID, 0, width);
2780  Pel * pPred = pcPredYuv->getAddr(compID, 0, width);
2781  Pel * pReco = pcRecoYuv->getAddr(compID, 0, width);
2782  Pel * pPCM = pcCU->getPCMSample (compID);
2783 
2784  xEncPCM ( pcCU, 0, pOrig, pPCM, pPred, pResi, pReco, stride, width, height, compID );
2785 
2786  }
2787 
2789  xEncIntraHeader ( pcCU, uiDepth, 0, true, false);
2791 
2792  dCost = m_pcRdCost->calcRdCost( uiBits, uiDistortion );
2793 
2795 
2796  pcCU->getTotalBits() = uiBits;
2797  pcCU->getTotalCost() = dCost;
2798  pcCU->getTotalDistortion() = uiDistortion;
2799 
2800  pcCU->copyToPic(uiDepth);
2801 }
2802 
2803 
2804 
2805 
2806 Void TEncSearch::xGetInterPredictionError( TComDataCU* pcCU, TComYuv* pcYuvOrg, Int iPartIdx, Distortion& ruiErr, Bool /*bHadamard*/ )
2807 {
2808  motionCompensation( pcCU, &m_tmpYuvPred, REF_PIC_LIST_X, iPartIdx );
2809 
2810  UInt uiAbsPartIdx = 0;
2811  Int iWidth = 0;
2812  Int iHeight = 0;
2813  pcCU->getPartIndexAndSize( iPartIdx, uiAbsPartIdx, iWidth, iHeight );
2814 
2815  DistParam cDistParam;
2816 
2817  cDistParam.bApplyWeight = false;
2818 
2819 
2821  pcYuvOrg->getAddr( COMPONENT_Y, uiAbsPartIdx ), pcYuvOrg->getStride(COMPONENT_Y),
2823  iWidth, iHeight, m_pcEncCfg->getUseHADME() && (pcCU->getCUTransquantBypass(iPartIdx) == 0) );
2824 
2825  ruiErr = cDistParam.DistFunc( &cDistParam );
2826 }
2827 
2829 Void TEncSearch::xMergeEstimation( TComDataCU* pcCU, TComYuv* pcYuvOrg, Int iPUIdx, UInt& uiInterDir, TComMvField* pacMvField, UInt& uiMergeIndex, Distortion& ruiCost, TComMvField* cMvFieldNeighbours, UChar* uhInterDirNeighbours, Int& numValidMergeCand )
2830 {
2831  UInt uiAbsPartIdx = 0;
2832  Int iWidth = 0;
2833  Int iHeight = 0;
2834 
2835  pcCU->getPartIndexAndSize( iPUIdx, uiAbsPartIdx, iWidth, iHeight );
2836  UInt uiDepth = pcCU->getDepth( uiAbsPartIdx );
2837 
2838  PartSize partSize = pcCU->getPartitionSize( 0 );
2839  if ( pcCU->getSlice()->getPPS()->getLog2ParallelMergeLevelMinus2() && partSize != SIZE_2Nx2N && pcCU->getWidth( 0 ) <= 8 )
2840  {
2841  if ( iPUIdx == 0 )
2842  {
2843  pcCU->setPartSizeSubParts( SIZE_2Nx2N, 0, uiDepth ); // temporarily set
2844  pcCU->getInterMergeCandidates( 0, 0, cMvFieldNeighbours,uhInterDirNeighbours, numValidMergeCand );
2845  pcCU->setPartSizeSubParts( partSize, 0, uiDepth ); // restore
2846  }
2847  }
2848  else
2849  {
2850  pcCU->getInterMergeCandidates( uiAbsPartIdx, iPUIdx, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand );
2851  }
2852 
2853  xRestrictBipredMergeCand( pcCU, iPUIdx, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand );
2854 
2855  ruiCost = std::numeric_limits<Distortion>::max();
2856  for( UInt uiMergeCand = 0; uiMergeCand < numValidMergeCand; ++uiMergeCand )
2857  {
2858  Distortion uiCostCand = std::numeric_limits<Distortion>::max();
2859  UInt uiBitsCand = 0;
2860 
2861  PartSize ePartSize = pcCU->getPartitionSize( 0 );
2862 
2863  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField( cMvFieldNeighbours[0 + 2*uiMergeCand], ePartSize, uiAbsPartIdx, 0, iPUIdx );
2864  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField( cMvFieldNeighbours[1 + 2*uiMergeCand], ePartSize, uiAbsPartIdx, 0, iPUIdx );
2865 
2866  xGetInterPredictionError( pcCU, pcYuvOrg, iPUIdx, uiCostCand, m_pcEncCfg->getUseHADME() );
2867  uiBitsCand = uiMergeCand + 1;
2868  if (uiMergeCand == m_pcEncCfg->getMaxNumMergeCand() -1)
2869  {
2870  uiBitsCand--;
2871  }
2872  uiCostCand = uiCostCand + m_pcRdCost->getCost( uiBitsCand );
2873  if ( uiCostCand < ruiCost )
2874  {
2875  ruiCost = uiCostCand;
2876  pacMvField[0] = cMvFieldNeighbours[0 + 2*uiMergeCand];
2877  pacMvField[1] = cMvFieldNeighbours[1 + 2*uiMergeCand];
2878  uiInterDir = uhInterDirNeighbours[uiMergeCand];
2879  uiMergeIndex = uiMergeCand;
2880  }
2881  }
2882 }
2883 
2892 Void TEncSearch::xRestrictBipredMergeCand( TComDataCU* pcCU, UInt puIdx, TComMvField* mvFieldNeighbours, UChar* interDirNeighbours, Int numValidMergeCand )
2893 {
2894  if ( pcCU->isBipredRestriction(puIdx) )
2895  {
2896  for( UInt mergeCand = 0; mergeCand < numValidMergeCand; ++mergeCand )
2897  {
2898  if ( interDirNeighbours[mergeCand] == 3 )
2899  {
2900  interDirNeighbours[mergeCand] = 1;
2901  mvFieldNeighbours[(mergeCand << 1) + 1].setMvField(TComMv(0,0), -1);
2902  }
2903  }
2904  }
2905 }
2906 
2908 #if AMP_MRG
2909 Void TEncSearch::predInterSearch( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv DEBUG_STRING_FN_DECLARE(sDebug), Bool bUseRes, Bool bUseMRG )
2910 #else
2911 Void TEncSearch::predInterSearch( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv, Bool bUseRes )
2912 #endif
2913 {
2914  for(UInt i=0; i<NUM_REF_PIC_LIST_01; i++)
2915  {
2916  m_acYuvPred[i].clear();
2917  }
2919  pcPredYuv->clear();
2920 
2921  if ( !bUseRes )
2922  {
2923  pcResiYuv->clear();
2924  }
2925 
2926  pcRecoYuv->clear();
2927 
2928  TComMv cMvSrchRngLT;
2929  TComMv cMvSrchRngRB;
2930 
2931  TComMv cMvZero;
2932  TComMv TempMv; //kolya
2933 
2934  TComMv cMv[2];
2935  TComMv cMvBi[2];
2936  TComMv cMvTemp[2][33];
2937 
2938  Int iNumPart = pcCU->getNumPartitions();
2939  Int iNumPredDir = pcCU->getSlice()->isInterP() ? 1 : 2;
2940 
2941  TComMv cMvPred[2][33];
2942 
2943  TComMv cMvPredBi[2][33];
2944  Int aaiMvpIdxBi[2][33];
2945 
2946  Int aaiMvpIdx[2][33];
2947  Int aaiMvpNum[2][33];
2948 
2949  AMVPInfo aacAMVPInfo[2][33];
2950 
2951  Int iRefIdx[2]={0,0}; //If un-initialized, may cause SEGV in bi-directional prediction iterative stage.
2952  Int iRefIdxBi[2];
2953 
2954  UInt uiPartAddr;
2955  Int iRoiWidth, iRoiHeight;
2956 
2957  UInt uiMbBits[3] = {1, 1, 0};
2958 
2959  UInt uiLastMode = 0;
2960  Int iRefStart, iRefEnd;
2961 
2962  PartSize ePartSize = pcCU->getPartitionSize( 0 );
2963 
2964  Int bestBiPRefIdxL1 = 0;
2965  Int bestBiPMvpL1 = 0;
2966  Distortion biPDistTemp = std::numeric_limits<Distortion>::max();
2967 
2968  TComMvField cMvFieldNeighbours[MRG_MAX_NUM_CANDS << 1]; // double length for mv of both lists
2969  UChar uhInterDirNeighbours[MRG_MAX_NUM_CANDS];
2970  Int numValidMergeCand = 0 ;
2971 
2972  for ( Int iPartIdx = 0; iPartIdx < iNumPart; iPartIdx++ )
2973  {
2974  Distortion uiCost[2] = { std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max() };
2975  Distortion uiCostBi = std::numeric_limits<Distortion>::max();
2976  Distortion uiCostTemp;
2977 
2978  UInt uiBits[3];
2979  UInt uiBitsTemp;
2980  Distortion bestBiPDist = std::numeric_limits<Distortion>::max();
2981 
2982  Distortion uiCostTempL0[MAX_NUM_REF];
2983  for (Int iNumRef=0; iNumRef < MAX_NUM_REF; iNumRef++)
2984  {
2985  uiCostTempL0[iNumRef] = std::numeric_limits<Distortion>::max();
2986  }
2987  UInt uiBitsTempL0[MAX_NUM_REF];
2988 
2989  TComMv mvValidList1;
2990  Int refIdxValidList1 = 0;
2991  UInt bitsValidList1 = MAX_UINT;
2992  Distortion costValidList1 = std::numeric_limits<Distortion>::max();
2993 
2994  xGetBlkBits( ePartSize, pcCU->getSlice()->isInterP(), iPartIdx, uiLastMode, uiMbBits);
2995 
2996  pcCU->getPartIndexAndSize( iPartIdx, uiPartAddr, iRoiWidth, iRoiHeight );
2997 
2998 #if AMP_MRG
2999  Bool bTestNormalMC = true;
3000 
3001  if ( bUseMRG && pcCU->getWidth( 0 ) > 8 && iNumPart == 2 )
3002  {
3003  bTestNormalMC = false;
3004  }
3005 
3006  if (bTestNormalMC)
3007  {
3008 #endif
3009 
3010  // Uni-directional prediction
3011  for ( Int iRefList = 0; iRefList < iNumPredDir; iRefList++ )
3012  {
3013  RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
3014 
3015  for ( Int iRefIdxTemp = 0; iRefIdxTemp < pcCU->getSlice()->getNumRefIdx(eRefPicList); iRefIdxTemp++ )
3016  {
3017  uiBitsTemp = uiMbBits[iRefList];
3018  if ( pcCU->getSlice()->getNumRefIdx(eRefPicList) > 1 )
3019  {
3020  uiBitsTemp += iRefIdxTemp+1;
3021  if ( iRefIdxTemp == pcCU->getSlice()->getNumRefIdx(eRefPicList)-1 )
3022  {
3023  uiBitsTemp--;
3024  }
3025  }
3026  xEstimateMvPredAMVP( pcCU, pcOrgYuv, iPartIdx, eRefPicList, iRefIdxTemp, cMvPred[iRefList][iRefIdxTemp], false, &biPDistTemp);
3027  aaiMvpIdx[iRefList][iRefIdxTemp] = pcCU->getMVPIdx(eRefPicList, uiPartAddr);
3028  aaiMvpNum[iRefList][iRefIdxTemp] = pcCU->getMVPNum(eRefPicList, uiPartAddr);
3029 
3030  if(pcCU->getSlice()->getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist)
3031  {
3032  bestBiPDist = biPDistTemp;
3033  bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
3034  bestBiPRefIdxL1 = iRefIdxTemp;
3035  }
3036 
3037  uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
3038 
3039 #if GPB_SIMPLE_UNI
3040  if ( iRefList == 1 ) // list 1
3041  {
3042  if ( pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp ) >= 0 )
3043  {
3044  cMvTemp[1][iRefIdxTemp] = cMvTemp[0][pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp )];
3045  uiCostTemp = uiCostTempL0[pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp )];
3046  /*first subtract the bit-rate part of the cost of the other list*/
3047  uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp )] );
3048  /*correct the bit-rate part of the current ref*/
3049  m_pcRdCost->setPredictor ( cMvPred[iRefList][iRefIdxTemp] );
3050  uiBitsTemp += m_pcRdCost->getBits( cMvTemp[1][iRefIdxTemp].getHor(), cMvTemp[1][iRefIdxTemp].getVer() );
3051  /*calculate the correct cost*/
3052  uiCostTemp += m_pcRdCost->getCost( uiBitsTemp );
3053  }
3054  else
3055  {
3056  xMotionEstimation ( pcCU, pcOrgYuv, iPartIdx, eRefPicList, &cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
3057  }
3058  }
3059  else
3060  {
3061  xMotionEstimation ( pcCU, pcOrgYuv, iPartIdx, eRefPicList, &cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
3062  }
3063 #else
3064  xMotionEstimation ( pcCU, pcOrgYuv, iPartIdx, eRefPicList, &cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
3065 #endif
3066  xCopyAMVPInfo(pcCU->getCUMvField(eRefPicList)->getAMVPInfo(), &aacAMVPInfo[iRefList][iRefIdxTemp]); // must always be done ( also when AMVP_MODE = AM_NONE )
3067  xCheckBestMVP(pcCU, eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp);
3068 
3069  if ( iRefList == 0 )
3070  {
3071  uiCostTempL0[iRefIdxTemp] = uiCostTemp;
3072  uiBitsTempL0[iRefIdxTemp] = uiBitsTemp;
3073  }
3074  if ( uiCostTemp < uiCost[iRefList] )
3075  {
3076  uiCost[iRefList] = uiCostTemp;
3077  uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction
3078 
3079  // set motion
3080  cMv[iRefList] = cMvTemp[iRefList][iRefIdxTemp];
3081  iRefIdx[iRefList] = iRefIdxTemp;
3082  }
3083 
3084  if ( iRefList == 1 && uiCostTemp < costValidList1 && pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp ) < 0 )
3085  {
3086  costValidList1 = uiCostTemp;
3087  bitsValidList1 = uiBitsTemp;
3088 
3089  // set motion
3090  mvValidList1 = cMvTemp[iRefList][iRefIdxTemp];
3091  refIdxValidList1 = iRefIdxTemp;
3092  }
3093  }
3094  }
3095 
3096  // Bi-directional prediction
3097  if ( (pcCU->getSlice()->isInterB()) && (pcCU->isBipredRestriction(iPartIdx) == false) )
3098  {
3099 
3100  cMvBi[0] = cMv[0]; cMvBi[1] = cMv[1];
3101  iRefIdxBi[0] = iRefIdx[0]; iRefIdxBi[1] = iRefIdx[1];
3102 
3103  ::memcpy(cMvPredBi, cMvPred, sizeof(cMvPred));
3104  ::memcpy(aaiMvpIdxBi, aaiMvpIdx, sizeof(aaiMvpIdx));
3105 
3106  UInt uiMotBits[2];
3107 
3108  if(pcCU->getSlice()->getMvdL1ZeroFlag())
3109  {
3110  xCopyAMVPInfo(&aacAMVPInfo[1][bestBiPRefIdxL1], pcCU->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo());
3111  pcCU->setMVPIdxSubParts( bestBiPMvpL1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3112  aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;
3113  cMvPredBi[1][bestBiPRefIdxL1] = pcCU->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo()->m_acMvCand[bestBiPMvpL1];
3114 
3115  cMvBi[1] = cMvPredBi[1][bestBiPRefIdxL1];
3116  iRefIdxBi[1] = bestBiPRefIdxL1;
3117  pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllMv( cMvBi[1], ePartSize, uiPartAddr, 0, iPartIdx );
3118  pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllRefIdx( iRefIdxBi[1], ePartSize, uiPartAddr, 0, iPartIdx );
3119  TComYuv* pcYuvPred = &m_acYuvPred[REF_PIC_LIST_1];
3120  motionCompensation( pcCU, pcYuvPred, REF_PIC_LIST_1, iPartIdx );
3121 
3122  uiMotBits[0] = uiBits[0] - uiMbBits[0];
3123  uiMotBits[1] = uiMbBits[1];
3124 
3125  if ( pcCU->getSlice()->getNumRefIdx(REF_PIC_LIST_1) > 1 )
3126  {
3127  uiMotBits[1] += bestBiPRefIdxL1+1;
3128  if ( bestBiPRefIdxL1 == pcCU->getSlice()->getNumRefIdx(REF_PIC_LIST_1)-1 )
3129  {
3130  uiMotBits[1]--;
3131  }
3132  }
3133 
3134  uiMotBits[1] += m_auiMVPIdxCost[aaiMvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS];
3135 
3136  uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
3137 
3138  cMvTemp[1][bestBiPRefIdxL1] = cMvBi[1];
3139  }
3140  else
3141  {
3142  uiMotBits[0] = uiBits[0] - uiMbBits[0];
3143  uiMotBits[1] = uiBits[1] - uiMbBits[1];
3144  uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
3145  }
3146 
3147  // 4-times iteration (default)
3148  Int iNumIter = 4;
3149 
3150  // fast encoder setting: only one iteration
3151  if ( m_pcEncCfg->getUseFastEnc() || pcCU->getSlice()->getMvdL1ZeroFlag())
3152  {
3153  iNumIter = 1;
3154  }
3155 
3156  for ( Int iIter = 0; iIter < iNumIter; iIter++ )
3157  {
3158  Int iRefList = iIter % 2;
3159 
3160  if ( m_pcEncCfg->getUseFastEnc() )
3161  {
3162  if( uiCost[0] <= uiCost[1] )
3163  {
3164  iRefList = 1;
3165  }
3166  else
3167  {
3168  iRefList = 0;
3169  }
3170  }
3171  else if ( iIter == 0 )
3172  {
3173  iRefList = 0;
3174  }
3175  if ( iIter == 0 && !pcCU->getSlice()->getMvdL1ZeroFlag())
3176  {
3177  pcCU->getCUMvField(RefPicList(1-iRefList))->setAllMv( cMv[1-iRefList], ePartSize, uiPartAddr, 0, iPartIdx );
3178  pcCU->getCUMvField(RefPicList(1-iRefList))->setAllRefIdx( iRefIdx[1-iRefList], ePartSize, uiPartAddr, 0, iPartIdx );
3179  TComYuv* pcYuvPred = &m_acYuvPred[1-iRefList];
3180  motionCompensation ( pcCU, pcYuvPred, RefPicList(1-iRefList), iPartIdx );
3181  }
3182 
3183  RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
3184 
3185  if(pcCU->getSlice()->getMvdL1ZeroFlag())
3186  {
3187  iRefList = 0;
3188  eRefPicList = REF_PIC_LIST_0;
3189  }
3190 
3191  Bool bChanged = false;
3192 
3193  iRefStart = 0;
3194  iRefEnd = pcCU->getSlice()->getNumRefIdx(eRefPicList)-1;
3195 
3196  for ( Int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++ )
3197  {
3198  uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList];
3199  if ( pcCU->getSlice()->getNumRefIdx(eRefPicList) > 1 )
3200  {
3201  uiBitsTemp += iRefIdxTemp+1;
3202  if ( iRefIdxTemp == pcCU->getSlice()->getNumRefIdx(eRefPicList)-1 )
3203  {
3204  uiBitsTemp--;
3205  }
3206  }
3207  uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
3208  // call ME
3209  xMotionEstimation ( pcCU, pcOrgYuv, iPartIdx, eRefPicList, &cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, true );
3210 
3211  xCopyAMVPInfo(&aacAMVPInfo[iRefList][iRefIdxTemp], pcCU->getCUMvField(eRefPicList)->getAMVPInfo());
3212  xCheckBestMVP(pcCU, eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp);
3213 
3214  if ( uiCostTemp < uiCostBi )
3215  {
3216  bChanged = true;
3217 
3218  cMvBi[iRefList] = cMvTemp[iRefList][iRefIdxTemp];
3219  iRefIdxBi[iRefList] = iRefIdxTemp;
3220 
3221  uiCostBi = uiCostTemp;
3222  uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList];
3223  uiBits[2] = uiBitsTemp;
3224 
3225  if(iNumIter!=1)
3226  {
3227  // Set motion
3228  pcCU->getCUMvField( eRefPicList )->setAllMv( cMvBi[iRefList], ePartSize, uiPartAddr, 0, iPartIdx );
3229  pcCU->getCUMvField( eRefPicList )->setAllRefIdx( iRefIdxBi[iRefList], ePartSize, uiPartAddr, 0, iPartIdx );
3230 
3231  TComYuv* pcYuvPred = &m_acYuvPred[iRefList];
3232  motionCompensation( pcCU, pcYuvPred, eRefPicList, iPartIdx );
3233  }
3234  }
3235  } // for loop-iRefIdxTemp
3236 
3237  if ( !bChanged )
3238  {
3239  if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] )
3240  {
3241  xCopyAMVPInfo(&aacAMVPInfo[0][iRefIdxBi[0]], pcCU->getCUMvField(REF_PIC_LIST_0)->getAMVPInfo());
3242  xCheckBestMVP(pcCU, REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], uiBits[2], uiCostBi);
3243  if(!pcCU->getSlice()->getMvdL1ZeroFlag())
3244  {
3245  xCopyAMVPInfo(&aacAMVPInfo[1][iRefIdxBi[1]], pcCU->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo());
3246  xCheckBestMVP(pcCU, REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], uiBits[2], uiCostBi);
3247  }
3248  }
3249  break;
3250  }
3251  } // for loop-iter
3252  } // if (B_SLICE)
3253 
3254 #if AMP_MRG
3255  } //end if bTestNormalMC
3256 #endif
3257  // Clear Motion Field
3258  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField( TComMvField(), ePartSize, uiPartAddr, 0, iPartIdx );
3259  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField( TComMvField(), ePartSize, uiPartAddr, 0, iPartIdx );
3260  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvd ( cMvZero, ePartSize, uiPartAddr, 0, iPartIdx );
3261  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvd ( cMvZero, ePartSize, uiPartAddr, 0, iPartIdx );
3262 
3263  pcCU->setMVPIdxSubParts( -1, REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3264  pcCU->setMVPNumSubParts( -1, REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3265  pcCU->setMVPIdxSubParts( -1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3266  pcCU->setMVPNumSubParts( -1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3267 
3268  UInt uiMEBits = 0;
3269  // Set Motion Field_
3270  cMv[1] = mvValidList1;
3271  iRefIdx[1] = refIdxValidList1;
3272  uiBits[1] = bitsValidList1;
3273  uiCost[1] = costValidList1;
3274 
3275 #if AMP_MRG
3276  if (bTestNormalMC)
3277  {
3278 #endif
3279  if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1])
3280  {
3281  uiLastMode = 2;
3282  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMv( cMvBi[0], ePartSize, uiPartAddr, 0, iPartIdx );
3283  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllRefIdx( iRefIdxBi[0], ePartSize, uiPartAddr, 0, iPartIdx );
3284  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMv( cMvBi[1], ePartSize, uiPartAddr, 0, iPartIdx );
3285  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllRefIdx( iRefIdxBi[1], ePartSize, uiPartAddr, 0, iPartIdx );
3286 
3287  TempMv = cMvBi[0] - cMvPredBi[0][iRefIdxBi[0]];
3288  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvd ( TempMv, ePartSize, uiPartAddr, 0, iPartIdx );
3289 
3290  TempMv = cMvBi[1] - cMvPredBi[1][iRefIdxBi[1]];
3291  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvd ( TempMv, ePartSize, uiPartAddr, 0, iPartIdx );
3292 
3293  pcCU->setInterDirSubParts( 3, uiPartAddr, iPartIdx, pcCU->getDepth(0) );
3294 
3295  pcCU->setMVPIdxSubParts( aaiMvpIdxBi[0][iRefIdxBi[0]], REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3296  pcCU->setMVPNumSubParts( aaiMvpNum[0][iRefIdxBi[0]], REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3297  pcCU->setMVPIdxSubParts( aaiMvpIdxBi[1][iRefIdxBi[1]], REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3298  pcCU->setMVPNumSubParts( aaiMvpNum[1][iRefIdxBi[1]], REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3299 
3300  uiMEBits = uiBits[2];
3301  }
3302  else if ( uiCost[0] <= uiCost[1] )
3303  {
3304  uiLastMode = 0;
3305  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMv( cMv[0], ePartSize, uiPartAddr, 0, iPartIdx );
3306  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllRefIdx( iRefIdx[0], ePartSize, uiPartAddr, 0, iPartIdx );
3307 
3308  TempMv = cMv[0] - cMvPred[0][iRefIdx[0]];
3309  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvd ( TempMv, ePartSize, uiPartAddr, 0, iPartIdx );
3310 
3311  pcCU->setInterDirSubParts( 1, uiPartAddr, iPartIdx, pcCU->getDepth(0) );
3312 
3313  pcCU->setMVPIdxSubParts( aaiMvpIdx[0][iRefIdx[0]], REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3314  pcCU->setMVPNumSubParts( aaiMvpNum[0][iRefIdx[0]], REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3315 
3316  uiMEBits = uiBits[0];
3317  }
3318  else
3319  {
3320  uiLastMode = 1;
3321  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMv( cMv[1], ePartSize, uiPartAddr, 0, iPartIdx );
3322  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllRefIdx( iRefIdx[1], ePartSize, uiPartAddr, 0, iPartIdx );
3323 
3324  TempMv = cMv[1] - cMvPred[1][iRefIdx[1]];
3325  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvd ( TempMv, ePartSize, uiPartAddr, 0, iPartIdx );
3326 
3327  pcCU->setInterDirSubParts( 2, uiPartAddr, iPartIdx, pcCU->getDepth(0) );
3328 
3329  pcCU->setMVPIdxSubParts( aaiMvpIdx[1][iRefIdx[1]], REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3330  pcCU->setMVPNumSubParts( aaiMvpNum[1][iRefIdx[1]], REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3331 
3332  uiMEBits = uiBits[1];
3333  }
3334 #if AMP_MRG
3335  } // end if bTestNormalMC
3336 #endif
3337 
3338  if ( pcCU->getPartitionSize( uiPartAddr ) != SIZE_2Nx2N )
3339  {
3340  UInt uiMRGInterDir = 0;
3341  TComMvField cMRGMvField[2];
3342  UInt uiMRGIndex = 0;
3343 
3344  UInt uiMEInterDir = 0;
3345  TComMvField cMEMvField[2];
3346 
3347  m_pcRdCost->getMotionCost( true, 0, pcCU->getCUTransquantBypass(uiPartAddr) );
3348 
3349 #if AMP_MRG
3350  // calculate ME cost
3351  Distortion uiMEError = std::numeric_limits<Distortion>::max();
3352  Distortion uiMECost = std::numeric_limits<Distortion>::max();
3353 
3354  if (bTestNormalMC)
3355  {
3356  xGetInterPredictionError( pcCU, pcOrgYuv, iPartIdx, uiMEError, m_pcEncCfg->getUseHADME() );
3357  uiMECost = uiMEError + m_pcRdCost->getCost( uiMEBits );
3358  }
3359 #else
3360  // calculate ME cost
3361  Distortion uiMEError = std::numeric_limits<Distortion>::max();
3362  xGetInterPredictionError( pcCU, pcOrgYuv, iPartIdx, uiMEError, m_pcEncCfg->getUseHADME() );
3363  Distortion uiMECost = uiMEError + m_pcRdCost->getCost( uiMEBits );
3364 #endif
3365  // save ME result.
3366  uiMEInterDir = pcCU->getInterDir( uiPartAddr );
3367  pcCU->getMvField( pcCU, uiPartAddr, REF_PIC_LIST_0, cMEMvField[0] );
3368  pcCU->getMvField( pcCU, uiPartAddr, REF_PIC_LIST_1, cMEMvField[1] );
3369 
3370  // find Merge result
3371  Distortion uiMRGCost = std::numeric_limits<Distortion>::max();
3372 
3373  xMergeEstimation( pcCU, pcOrgYuv, iPartIdx, uiMRGInterDir, cMRGMvField, uiMRGIndex, uiMRGCost, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand);
3374 
3375  if ( uiMRGCost < uiMECost )
3376  {
3377  // set Merge result
3378  pcCU->setMergeFlagSubParts ( true, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) );
3379  pcCU->setMergeIndexSubParts( uiMRGIndex, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) );
3380  pcCU->setInterDirSubParts ( uiMRGInterDir, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) );
3381  pcCU->getCUMvField( REF_PIC_LIST_0 )->setAllMvField( cMRGMvField[0], ePartSize, uiPartAddr, 0, iPartIdx );
3382  pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllMvField( cMRGMvField[1], ePartSize, uiPartAddr, 0, iPartIdx );
3383 
3384  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvd ( cMvZero, ePartSize, uiPartAddr, 0, iPartIdx );
3385  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvd ( cMvZero, ePartSize, uiPartAddr, 0, iPartIdx );
3386 
3387  pcCU->setMVPIdxSubParts( -1, REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3388  pcCU->setMVPNumSubParts( -1, REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3389  pcCU->setMVPIdxSubParts( -1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3390  pcCU->setMVPNumSubParts( -1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3391  }
3392  else
3393  {
3394  // set ME result
3395  pcCU->setMergeFlagSubParts( false, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) );
3396  pcCU->setInterDirSubParts ( uiMEInterDir, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) );
3397  pcCU->getCUMvField( REF_PIC_LIST_0 )->setAllMvField( cMEMvField[0], ePartSize, uiPartAddr, 0, iPartIdx );
3398  pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllMvField( cMEMvField[1], ePartSize, uiPartAddr, 0, iPartIdx );
3399  }
3400  }
3401 
3402  // MC
3403  motionCompensation ( pcCU, pcPredYuv, REF_PIC_LIST_X, iPartIdx );
3404 
3405  } // end of for ( Int iPartIdx = 0; iPartIdx < iNumPart; iPartIdx++ )
3406 
3408 
3409  return;
3410 }
3411 
3412 
3413 // AMVP
3414 Void TEncSearch::xEstimateMvPredAMVP( TComDataCU* pcCU, TComYuv* pcOrgYuv, UInt uiPartIdx, RefPicList eRefPicList, Int iRefIdx, TComMv& rcMvPred, Bool bFilled, Distortion* puiDistBiP )
3415 {
3416  AMVPInfo* pcAMVPInfo = pcCU->getCUMvField(eRefPicList)->getAMVPInfo();
3417 
3418  TComMv cBestMv;
3419  Int iBestIdx = 0;
3420  TComMv cZeroMv;
3421  TComMv cMvPred;
3422  Distortion uiBestCost = std::numeric_limits<Distortion>::max();
3423  UInt uiPartAddr = 0;
3424  Int iRoiWidth, iRoiHeight;
3425  Int i;
3426 
3427  pcCU->getPartIndexAndSize( uiPartIdx, uiPartAddr, iRoiWidth, iRoiHeight );
3428  // Fill the MV Candidates
3429  if (!bFilled)
3430  {
3431  pcCU->fillMvpCand( uiPartIdx, uiPartAddr, eRefPicList, iRefIdx, pcAMVPInfo );
3432  }
3433 
3434  // initialize Mvp index & Mvp
3435  iBestIdx = 0;
3436  cBestMv = pcAMVPInfo->m_acMvCand[0];
3437  if (pcAMVPInfo->iN <= 1)
3438  {
3439  rcMvPred = cBestMv;
3440 
3441  pcCU->setMVPIdxSubParts( iBestIdx, eRefPicList, uiPartAddr, uiPartIdx, pcCU->getDepth(uiPartAddr));
3442  pcCU->setMVPNumSubParts( pcAMVPInfo->iN, eRefPicList, uiPartAddr, uiPartIdx, pcCU->getDepth(uiPartAddr));
3443 
3444  if(pcCU->getSlice()->getMvdL1ZeroFlag() && eRefPicList==REF_PIC_LIST_1)
3445  {
3446  (*puiDistBiP) = xGetTemplateCost( pcCU, uiPartIdx, uiPartAddr, pcOrgYuv, &m_cYuvPredTemp, rcMvPred, 0, AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdx, iRoiWidth, iRoiHeight);
3447  }
3448  return;
3449  }
3450 
3451  if (bFilled)
3452  {
3453  assert(pcCU->getMVPIdx(eRefPicList,uiPartAddr) >= 0);
3454  rcMvPred = pcAMVPInfo->m_acMvCand[pcCU->getMVPIdx(eRefPicList,uiPartAddr)];
3455  return;
3456  }
3457 
3459  //-- Check Minimum Cost.
3460  for ( i = 0 ; i < pcAMVPInfo->iN; i++)
3461  {
3462  Distortion uiTmpCost;
3463  uiTmpCost = xGetTemplateCost( pcCU, uiPartIdx, uiPartAddr, pcOrgYuv, &m_cYuvPredTemp, pcAMVPInfo->m_acMvCand[i], i, AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdx, iRoiWidth, iRoiHeight);
3464  if ( uiBestCost > uiTmpCost )
3465  {
3466  uiBestCost = uiTmpCost;
3467  cBestMv = pcAMVPInfo->m_acMvCand[i];
3468  iBestIdx = i;
3469  (*puiDistBiP) = uiTmpCost;
3470  }
3471  }
3472 
3474 
3475  // Setting Best MVP
3476  rcMvPred = cBestMv;
3477  pcCU->setMVPIdxSubParts( iBestIdx, eRefPicList, uiPartAddr, uiPartIdx, pcCU->getDepth(uiPartAddr));
3478  pcCU->setMVPNumSubParts( pcAMVPInfo->iN, eRefPicList, uiPartAddr, uiPartIdx, pcCU->getDepth(uiPartAddr));
3479  return;
3480 }
3481 
3483 {
3484  assert(iIdx >= 0 && iNum >= 0 && iIdx < iNum);
3485 
3486  if (iNum == 1)
3487  {
3488  return 0;
3489  }
3490 
3491  UInt uiLength = 1;
3492  Int iTemp = iIdx;
3493  if ( iTemp == 0 )
3494  {
3495  return uiLength;
3496  }
3497 
3498  Bool bCodeLast = ( iNum-1 > iTemp );
3499 
3500  uiLength += (iTemp-1);
3501 
3502  if( bCodeLast )
3503  {
3504  uiLength++;
3505  }
3506 
3507  return uiLength;
3508 }
3509 
3510 Void TEncSearch::xGetBlkBits( PartSize eCUMode, Bool bPSlice, Int iPartIdx, UInt uiLastMode, UInt uiBlkBit[3])
3511 {
3512  if ( eCUMode == SIZE_2Nx2N )
3513  {
3514  uiBlkBit[0] = (! bPSlice) ? 3 : 1;
3515  uiBlkBit[1] = 3;
3516  uiBlkBit[2] = 5;
3517  }
3518  else if ( (eCUMode == SIZE_2NxN || eCUMode == SIZE_2NxnU) || eCUMode == SIZE_2NxnD )
3519  {
3520  UInt aauiMbBits[2][3][3] = { { {0,0,3}, {0,0,0}, {0,0,0} } , { {5,7,7}, {7,5,7}, {9-3,9-3,9-3} } };
3521  if ( bPSlice )
3522  {
3523  uiBlkBit[0] = 3;
3524  uiBlkBit[1] = 0;
3525  uiBlkBit[2] = 0;
3526  }
3527  else
3528  {
3529  ::memcpy( uiBlkBit, aauiMbBits[iPartIdx][uiLastMode], 3*sizeof(UInt) );
3530  }
3531  }
3532  else if ( (eCUMode == SIZE_Nx2N || eCUMode == SIZE_nLx2N) || eCUMode == SIZE_nRx2N )
3533  {
3534  UInt aauiMbBits[2][3][3] = { { {0,2,3}, {0,0,0}, {0,0,0} } , { {5,7,7}, {7-2,7-2,9-2}, {9-3,9-3,9-3} } };
3535  if ( bPSlice )
3536  {
3537  uiBlkBit[0] = 3;
3538  uiBlkBit[1] = 0;
3539  uiBlkBit[2] = 0;
3540  }
3541  else
3542  {
3543  ::memcpy( uiBlkBit, aauiMbBits[iPartIdx][uiLastMode], 3*sizeof(UInt) );
3544  }
3545  }
3546  else if ( eCUMode == SIZE_NxN )
3547  {
3548  uiBlkBit[0] = (! bPSlice) ? 3 : 1;
3549  uiBlkBit[1] = 3;
3550  uiBlkBit[2] = 5;
3551  }
3552  else
3553  {
3554  printf("Wrong!\n");
3555  assert( 0 );
3556  }
3557 }
3558 
3560 {
3561  pDst->iN = pSrc->iN;
3562  for (Int i = 0; i < pSrc->iN; i++)
3563  {
3564  pDst->m_acMvCand[i] = pSrc->m_acMvCand[i];
3565  }
3566 }
3567 
3568 Void TEncSearch::xCheckBestMVP ( TComDataCU* pcCU, RefPicList eRefPicList, TComMv cMv, TComMv& rcMvPred, Int& riMVPIdx, UInt& ruiBits, Distortion& ruiCost )
3569 {
3570  AMVPInfo* pcAMVPInfo = pcCU->getCUMvField(eRefPicList)->getAMVPInfo();
3571 
3572  assert(pcAMVPInfo->m_acMvCand[riMVPIdx] == rcMvPred);
3573 
3574  if (pcAMVPInfo->iN < 2)
3575  {
3576  return;
3577  }
3578 
3579  m_pcRdCost->getMotionCost( true, 0, pcCU->getCUTransquantBypass(0) );
3580  m_pcRdCost->setCostScale ( 0 );
3581 
3582  Int iBestMVPIdx = riMVPIdx;
3583 
3584  m_pcRdCost->setPredictor( rcMvPred );
3585  Int iOrgMvBits = m_pcRdCost->getBits(cMv.getHor(), cMv.getVer());
3586  iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
3587  Int iBestMvBits = iOrgMvBits;
3588 
3589  for (Int iMVPIdx = 0; iMVPIdx < pcAMVPInfo->iN; iMVPIdx++)
3590  {
3591  if (iMVPIdx == riMVPIdx)
3592  {
3593  continue;
3594  }
3595 
3596  m_pcRdCost->setPredictor( pcAMVPInfo->m_acMvCand[iMVPIdx] );
3597 
3598  Int iMvBits = m_pcRdCost->getBits(cMv.getHor(), cMv.getVer());
3599  iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
3600 
3601  if (iMvBits < iBestMvBits)
3602  {
3603  iBestMvBits = iMvBits;
3604  iBestMVPIdx = iMVPIdx;
3605  }
3606  }
3607 
3608  if (iBestMVPIdx != riMVPIdx) //if changed
3609  {
3610  rcMvPred = pcAMVPInfo->m_acMvCand[iBestMVPIdx];
3611 
3612  riMVPIdx = iBestMVPIdx;
3613  UInt uiOrgBits = ruiBits;
3614  ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits;
3615  ruiCost = (ruiCost - m_pcRdCost->getCost( uiOrgBits )) + m_pcRdCost->getCost( ruiBits );
3616  }
3617 }
3618 
3619 
3621  UInt uiPartIdx,
3622  UInt uiPartAddr,
3623  TComYuv* pcOrgYuv,
3624  TComYuv* pcTemplateCand,
3625  TComMv cMvCand,
3626  Int iMVPIdx,
3627  Int iMVPNum,
3628  RefPicList eRefPicList,
3629  Int iRefIdx,
3630  Int iSizeX,
3631  Int iSizeY
3632  )
3633 {
3634  Distortion uiCost = std::numeric_limits<Distortion>::max();
3635 
3636  TComPicYuv* pcPicYuvRef = pcCU->getSlice()->getRefPic( eRefPicList, iRefIdx )->getPicYuvRec();
3637 
3638  pcCU->clipMv( cMvCand );
3639 
3640  // prediction pattern
3641  if ( pcCU->getSlice()->testWeightPred() && pcCU->getSlice()->getSliceType()==P_SLICE )
3642  {
3643  xPredInterBlk( COMPONENT_Y, pcCU, pcPicYuvRef, uiPartAddr, &cMvCand, iSizeX, iSizeY, pcTemplateCand, true );
3644  }
3645  else
3646  {
3647  xPredInterBlk( COMPONENT_Y, pcCU, pcPicYuvRef, uiPartAddr, &cMvCand, iSizeX, iSizeY, pcTemplateCand, false );
3648  }
3649 
3650  if ( pcCU->getSlice()->testWeightPred() && pcCU->getSlice()->getSliceType()==P_SLICE )
3651  {
3652  xWeightedPredictionUni( pcCU, pcTemplateCand, uiPartAddr, iSizeX, iSizeY, eRefPicList, pcTemplateCand, iRefIdx );
3653  }
3654 
3655  // calc distortion
3656 
3657  uiCost = m_pcRdCost->getDistPart( g_bitDepth[CHANNEL_TYPE_LUMA], pcTemplateCand->getAddr(COMPONENT_Y, uiPartAddr), pcTemplateCand->getStride(COMPONENT_Y), pcOrgYuv->getAddr(COMPONENT_Y, uiPartAddr), pcOrgYuv->getStride(COMPONENT_Y), iSizeX, iSizeY, COMPONENT_Y, DF_SAD );
3658  uiCost = (UInt) m_pcRdCost->calcRdCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum], uiCost, false, DF_SAD );
3659  return uiCost;
3660 }
3661 
3662 
3663 
3664 
3665 Void TEncSearch::xMotionEstimation( TComDataCU* pcCU, TComYuv* pcYuvOrg, Int iPartIdx, RefPicList eRefPicList, TComMv* pcMvPred, Int iRefIdxPred, TComMv& rcMv, UInt& ruiBits, Distortion& ruiCost, Bool bBi )
3666 {
3667  UInt uiPartAddr;
3668  Int iRoiWidth;
3669  Int iRoiHeight;
3670 
3671  TComMv cMvHalf, cMvQter;
3672  TComMv cMvSrchRngLT;
3673  TComMv cMvSrchRngRB;
3674 
3675  TComYuv* pcYuv = pcYuvOrg;
3676 
3677  assert(eRefPicList < MAX_NUM_REF_LIST_ADAPT_SR && iRefIdxPred<Int(MAX_IDX_ADAPT_SR));
3678  m_iSearchRange = m_aaiAdaptSR[eRefPicList][iRefIdxPred];
3679 
3680  Int iSrchRng = ( bBi ? m_bipredSearchRange : m_iSearchRange );
3681  TComPattern tmpPattern;
3682  TComPattern* pcPatternKey = &tmpPattern;
3683 
3684  Double fWeight = 1.0;
3685 
3686  pcCU->getPartIndexAndSize( iPartIdx, uiPartAddr, iRoiWidth, iRoiHeight );
3687 
3688  if ( bBi )
3689  {
3690  TComYuv* pcYuvOther = &m_acYuvPred[1-(Int)eRefPicList];
3691  pcYuv = &m_cYuvPredTemp;
3692 
3693  pcYuvOrg->copyPartToPartYuv( pcYuv, uiPartAddr, iRoiWidth, iRoiHeight );
3694 
3695  pcYuv->removeHighFreq( pcYuvOther, uiPartAddr, iRoiWidth, iRoiHeight );
3696 
3697  fWeight = 0.5;
3698  }
3699 
3700  // Search key pattern initialization
3701  pcPatternKey->initPattern( pcYuv->getAddr ( COMPONENT_Y, uiPartAddr ),
3702  iRoiWidth,
3703  iRoiHeight,
3704  pcYuv->getStride(COMPONENT_Y) );
3705 
3706  Pel* piRefY = pcCU->getSlice()->getRefPic( eRefPicList, iRefIdxPred )->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), pcCU->getZorderIdxInCtu() + uiPartAddr );
3707  Int iRefStride = pcCU->getSlice()->getRefPic( eRefPicList, iRefIdxPred )->getPicYuvRec()->getStride(COMPONENT_Y);
3708 
3709  TComMv cMvPred = *pcMvPred;
3710 
3711  if ( bBi )
3712  {
3713  xSetSearchRange ( pcCU, rcMv , iSrchRng, cMvSrchRngLT, cMvSrchRngRB );
3714  }
3715  else
3716  {
3717  xSetSearchRange ( pcCU, cMvPred, iSrchRng, cMvSrchRngLT, cMvSrchRngRB );
3718  }
3719 
3720  m_pcRdCost->getMotionCost( true, 0, pcCU->getCUTransquantBypass(uiPartAddr) );
3721 
3722  m_pcRdCost->setPredictor ( *pcMvPred );
3723  m_pcRdCost->setCostScale ( 2 );
3724 
3725  setWpScalingDistParam( pcCU, iRefIdxPred, eRefPicList );
3726  // Do integer search
3727  if ( !m_iFastSearch || bBi )
3728  {
3729  xPatternSearch ( pcPatternKey, piRefY, iRefStride, &cMvSrchRngLT, &cMvSrchRngRB, rcMv, ruiCost );
3730  }
3731  else
3732  {
3733  rcMv = *pcMvPred;
3734  const TComMv *pIntegerMv2Nx2NPred=0;
3735  if (pcCU->getPartitionSize(0) != SIZE_2Nx2N || pcCU->getDepth(0) != 0)
3736  {
3737  pIntegerMv2Nx2NPred = &(m_integerMv2Nx2N[eRefPicList][iRefIdxPred]);
3738  }
3739  xPatternSearchFast ( pcCU, pcPatternKey, piRefY, iRefStride, &cMvSrchRngLT, &cMvSrchRngRB, rcMv, ruiCost, pIntegerMv2Nx2NPred );
3740  if (pcCU->getPartitionSize(0) == SIZE_2Nx2N)
3741  {
3742  m_integerMv2Nx2N[eRefPicList][iRefIdxPred] = rcMv;
3743  }
3744  }
3745 
3746  m_pcRdCost->getMotionCost( true, 0, pcCU->getCUTransquantBypass(uiPartAddr) );
3747  m_pcRdCost->setCostScale ( 1 );
3748 
3749  const Bool bIsLosslessCoded = pcCU->getCUTransquantBypass(uiPartAddr) != 0;
3750  xPatternSearchFracDIF( bIsLosslessCoded, pcPatternKey, piRefY, iRefStride, &rcMv, cMvHalf, cMvQter, ruiCost ,bBi );
3751 
3752  m_pcRdCost->setCostScale( 0 );
3753  rcMv <<= 2;
3754  rcMv += (cMvHalf <<= 1);
3755  rcMv += cMvQter;
3756 
3757  UInt uiMvBits = m_pcRdCost->getBits( rcMv.getHor(), rcMv.getVer() );
3758 
3759  ruiBits += uiMvBits;
3760  ruiCost = (Distortion)( floor( fWeight * ( (Double)ruiCost - (Double)m_pcRdCost->getCost( uiMvBits ) ) ) + (Double)m_pcRdCost->getCost( ruiBits ) );
3761 }
3762 
3763 
3764 
3765 
3766 Void TEncSearch::xSetSearchRange ( TComDataCU* pcCU, TComMv& cMvPred, Int iSrchRng, TComMv& rcMvSrchRngLT, TComMv& rcMvSrchRngRB )
3767 {
3768  Int iMvShift = 2;
3769  TComMv cTmpMvPred = cMvPred;
3770  pcCU->clipMv( cTmpMvPred );
3771 
3772  rcMvSrchRngLT.setHor( cTmpMvPred.getHor() - (iSrchRng << iMvShift) );
3773  rcMvSrchRngLT.setVer( cTmpMvPred.getVer() - (iSrchRng << iMvShift) );
3774 
3775  rcMvSrchRngRB.setHor( cTmpMvPred.getHor() + (iSrchRng << iMvShift) );
3776  rcMvSrchRngRB.setVer( cTmpMvPred.getVer() + (iSrchRng << iMvShift) );
3777  pcCU->clipMv ( rcMvSrchRngLT );
3778  pcCU->clipMv ( rcMvSrchRngRB );
3779 
3780  rcMvSrchRngLT >>= iMvShift;
3781  rcMvSrchRngRB >>= iMvShift;
3782 }
3783 
3784 
3785 
3786 
3787 Void TEncSearch::xPatternSearch( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, TComMv* pcMvSrchRngLT, TComMv* pcMvSrchRngRB, TComMv& rcMv, Distortion& ruiSAD )
3788 {
3789  Int iSrchRngHorLeft = pcMvSrchRngLT->getHor();
3790  Int iSrchRngHorRight = pcMvSrchRngRB->getHor();
3791  Int iSrchRngVerTop = pcMvSrchRngLT->getVer();
3792  Int iSrchRngVerBottom = pcMvSrchRngRB->getVer();
3793 
3794  Distortion uiSad;
3795  Distortion uiSadBest = std::numeric_limits<Distortion>::max();
3796  Int iBestX = 0;
3797  Int iBestY = 0;
3798 
3799  Pel* piRefSrch;
3800 
3801  //-- jclee for using the SAD function pointer
3802  m_pcRdCost->setDistParam( pcPatternKey, piRefY, iRefStride, m_cDistParam );
3803 
3804  // fast encoder decision: use subsampled SAD for integer ME
3805  if ( m_pcEncCfg->getUseFastEnc() )
3806  {
3807  if ( m_cDistParam.iRows > 8 )
3808  {
3809  m_cDistParam.iSubShift = 1;
3810  }
3811  }
3812 
3813  piRefY += (iSrchRngVerTop * iRefStride);
3814  for ( Int y = iSrchRngVerTop; y <= iSrchRngVerBottom; y++ )
3815  {
3816  for ( Int x = iSrchRngHorLeft; x <= iSrchRngHorRight; x++ )
3817  {
3818  // find min. distortion position
3819  piRefSrch = piRefY + x;
3820  m_cDistParam.pCur = piRefSrch;
3821 
3823 
3825  uiSad = m_cDistParam.DistFunc( &m_cDistParam );
3826 
3827  // motion cost
3828  uiSad += m_pcRdCost->getCost( x, y );
3829 
3830  if ( uiSad < uiSadBest )
3831  {
3832  uiSadBest = uiSad;
3833  iBestX = x;
3834  iBestY = y;
3835  }
3836  }
3837  piRefY += iRefStride;
3838  }
3839 
3840  rcMv.set( iBestX, iBestY );
3841 
3842  ruiSAD = uiSadBest - m_pcRdCost->getCost( iBestX, iBestY );
3843  return;
3844 }
3845 
3846 
3847 
3849  TComPattern* pcPatternKey,
3850  Pel* piRefY,
3851  Int iRefStride,
3852  TComMv* pcMvSrchRngLT,
3853  TComMv* pcMvSrchRngRB,
3854  TComMv &rcMv,
3855  Distortion &ruiSAD,
3856  const TComMv* pIntegerMv2Nx2NPred )
3857 {
3858  assert (MD_LEFT < NUM_MV_PREDICTORS);
3860  assert (MD_ABOVE < NUM_MV_PREDICTORS);
3862  assert (MD_ABOVE_RIGHT < NUM_MV_PREDICTORS);
3864 
3865  switch ( m_iFastSearch )
3866  {
3867  case 1:
3868  xTZSearch( pcCU, pcPatternKey, piRefY, iRefStride, pcMvSrchRngLT, pcMvSrchRngRB, rcMv, ruiSAD, pIntegerMv2Nx2NPred );
3869  break;
3870 
3871  case 2:
3872  xTZSearchSelective( pcCU, pcPatternKey, piRefY, iRefStride, pcMvSrchRngLT, pcMvSrchRngRB, rcMv, ruiSAD, pIntegerMv2Nx2NPred );
3873  break;
3874  default:
3875  break;
3876  }
3877 }
3878 
3879 
3880 
3881 
3883  TComPattern* pcPatternKey,
3884  Pel* piRefY,
3885  Int iRefStride,
3886  TComMv* pcMvSrchRngLT,
3887  TComMv* pcMvSrchRngRB,
3888  TComMv &rcMv,
3889  Distortion &ruiSAD,
3890  const TComMv* pIntegerMv2Nx2NPred )
3891 {
3892  Int iSrchRngHorLeft = pcMvSrchRngLT->getHor();
3893  Int iSrchRngHorRight = pcMvSrchRngRB->getHor();
3894  Int iSrchRngVerTop = pcMvSrchRngLT->getVer();
3895  Int iSrchRngVerBottom = pcMvSrchRngRB->getVer();
3896 
3898 
3899  UInt uiSearchRange = m_iSearchRange;
3900  pcCU->clipMv( rcMv );
3901  rcMv >>= 2;
3902  // init TZSearchStruct
3903  IntTZSearchStruct cStruct;
3904  cStruct.iYStride = iRefStride;
3905  cStruct.piRefY = piRefY;
3906  cStruct.uiBestSad = MAX_UINT;
3907 
3908  // set rcMv (Median predictor) as start point and as best point
3909  xTZSearchHelp( pcPatternKey, cStruct, rcMv.getHor(), rcMv.getVer(), 0, 0 );
3910 
3911  // test whether one of PRED_A, PRED_B, PRED_C MV is better start point than Median predictor
3912  if ( bTestOtherPredictedMV )
3913  {
3914  for ( UInt index = 0; index < NUM_MV_PREDICTORS; index++ )
3915  {
3916  TComMv cMv = m_acMvPredictors[index];
3917  pcCU->clipMv( cMv );
3918  cMv >>= 2;
3919  xTZSearchHelp( pcPatternKey, cStruct, cMv.getHor(), cMv.getVer(), 0, 0 );
3920  }
3921  }
3922 
3923  // test whether zero Mv is better start point than Median predictor
3924  if ( bTestZeroVector )
3925  {
3926  xTZSearchHelp( pcPatternKey, cStruct, 0, 0, 0, 0 );
3927  }
3928 
3929  if (pIntegerMv2Nx2NPred != 0)
3930  {
3931  TComMv integerMv2Nx2NPred = *pIntegerMv2Nx2NPred;
3932  integerMv2Nx2NPred <<= 2;
3933  pcCU->clipMv( integerMv2Nx2NPred );
3934  integerMv2Nx2NPred >>= 2;
3935  xTZSearchHelp(pcPatternKey, cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0);
3936 
3937  // reset search range
3938  TComMv cMvSrchRngLT;
3939  TComMv cMvSrchRngRB;
3940  Int iSrchRng = m_iSearchRange;
3941  TComMv currBestMv(cStruct.iBestX, cStruct.iBestY );
3942  currBestMv <<= 2;
3943  xSetSearchRange( pcCU, currBestMv, iSrchRng, cMvSrchRngLT, cMvSrchRngRB );
3944  iSrchRngHorLeft = cMvSrchRngLT.getHor();
3945  iSrchRngHorRight = cMvSrchRngRB.getHor();
3946  iSrchRngVerTop = cMvSrchRngLT.getVer();
3947  iSrchRngVerBottom = cMvSrchRngRB.getVer();
3948  }
3949 
3950  // start search
3951  Int iDist = 0;
3952  Int iStartX = cStruct.iBestX;
3953  Int iStartY = cStruct.iBestY;
3954 
3955  // first search
3956  for ( iDist = 1; iDist <= (Int)uiSearchRange; iDist*=2 )
3957  {
3958  if ( bFirstSearchDiamond == 1 )
3959  {
3960  xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist );
3961  }
3962  else
3963  {
3964  xTZ8PointSquareSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist );
3965  }
3966 
3967  if ( bFirstSearchStop && ( cStruct.uiBestRound >= uiFirstSearchRounds ) ) // stop criterion
3968  {
3969  break;
3970  }
3971  }
3972 
3973  // test whether zero Mv is a better start point than Median predictor
3974  if ( bTestZeroVectorStart && ((cStruct.iBestX != 0) || (cStruct.iBestY != 0)) )
3975  {
3976  xTZSearchHelp( pcPatternKey, cStruct, 0, 0, 0, 0 );
3977  if ( (cStruct.iBestX == 0) && (cStruct.iBestY == 0) )
3978  {
3979  // test its neighborhood
3980  for ( iDist = 1; iDist <= (Int)uiSearchRange; iDist*=2 )
3981  {
3982  xTZ8PointDiamondSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, 0, 0, iDist );
3983  if ( bTestZeroVectorStop && (cStruct.uiBestRound > 0) ) // stop criterion
3984  {
3985  break;
3986  }
3987  }
3988  }
3989  }
3990 
3991  // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1
3992  if ( cStruct.uiBestDistance == 1 )
3993  {
3994  cStruct.uiBestDistance = 0;
3995  xTZ2PointSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB );
3996  }
3997 
3998  // raster search if distance is too big
3999  if ( bEnableRasterSearch && ( ((Int)(cStruct.uiBestDistance) > iRaster) || bAlwaysRasterSearch ) )
4000  {
4001  cStruct.uiBestDistance = iRaster;
4002  for ( iStartY = iSrchRngVerTop; iStartY <= iSrchRngVerBottom; iStartY += iRaster )
4003  {
4004  for ( iStartX = iSrchRngHorLeft; iStartX <= iSrchRngHorRight; iStartX += iRaster )
4005  {
4006  xTZSearchHelp( pcPatternKey, cStruct, iStartX, iStartY, 0, iRaster );
4007  }
4008  }
4009  }
4010 
4011  // raster refinement
4012  if ( bRasterRefinementEnable && cStruct.uiBestDistance > 0 )
4013  {
4014  while ( cStruct.uiBestDistance > 0 )
4015  {
4016  iStartX = cStruct.iBestX;
4017  iStartY = cStruct.iBestY;
4018  if ( cStruct.uiBestDistance > 1 )
4019  {
4020  iDist = cStruct.uiBestDistance >>= 1;
4021  if ( bRasterRefinementDiamond == 1 )
4022  {
4023  xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist );
4024  }
4025  else
4026  {
4027  xTZ8PointSquareSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist );
4028  }
4029  }
4030 
4031  // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1
4032  if ( cStruct.uiBestDistance == 1 )
4033  {
4034  cStruct.uiBestDistance = 0;
4035  if ( cStruct.ucPointNr != 0 )
4036  {
4037  xTZ2PointSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB );
4038  }
4039  }
4040  }
4041  }
4042 
4043  // start refinement
4044  if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 )
4045  {
4046  while ( cStruct.uiBestDistance > 0 )
4047  {
4048  iStartX = cStruct.iBestX;
4049  iStartY = cStruct.iBestY;
4050  cStruct.uiBestDistance = 0;
4051  cStruct.ucPointNr = 0;
4052  for ( iDist = 1; iDist < (Int)uiSearchRange + 1; iDist*=2 )
4053  {
4054  if ( bStarRefinementDiamond == 1 )
4055  {
4056  xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist );
4057  }
4058  else
4059  {
4060  xTZ8PointSquareSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist );
4061  }
4062  if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion
4063  {
4064  break;
4065  }
4066  }
4067 
4068  // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1
4069  if ( cStruct.uiBestDistance == 1 )
4070  {
4071  cStruct.uiBestDistance = 0;
4072  if ( cStruct.ucPointNr != 0 )
4073  {
4074  xTZ2PointSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB );
4075  }
4076  }
4077  }
4078  }
4079 
4080  // write out best match
4081  rcMv.set( cStruct.iBestX, cStruct.iBestY );
4082  ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCost( cStruct.iBestX, cStruct.iBestY );
4083 }
4084 
4085 
4087  TComPattern* pcPatternKey,
4088  Pel* piRefY,
4089  Int iRefStride,
4090  TComMv* pcMvSrchRngLT,
4091  TComMv* pcMvSrchRngRB,
4092  TComMv &rcMv,
4093  Distortion &ruiSAD,
4094  const TComMv* pIntegerMv2Nx2NPred )
4095 {
4097 
4098  Int iSrchRngHorLeft = pcMvSrchRngLT->getHor();
4099  Int iSrchRngHorRight = pcMvSrchRngRB->getHor();
4100  Int iSrchRngVerTop = pcMvSrchRngLT->getVer();
4101  Int iSrchRngVerBottom = pcMvSrchRngRB->getVer();
4102  Int iFirstSrchRngHorLeft = 0;
4103  Int iFirstSrchRngHorRight = 0;
4104  Int iFirstSrchRngVerTop = 0;
4105  Int iFirstSrchRngVerBottom = 0;
4106  Int iStartX = 0;
4107  Int iStartY = 0;
4108  Int iBestX = 0;
4109  Int iBestY = 0;
4110  Int iDist = 0;
4111 
4112  pcCU->clipMv( rcMv );
4113  rcMv >>= 2;
4114  // init TZSearchStruct
4115  IntTZSearchStruct cStruct;
4116  cStruct.iYStride = iRefStride;
4117  cStruct.piRefY = piRefY;
4118  cStruct.uiBestSad = MAX_UINT;
4119  cStruct.iBestX = 0;
4120  cStruct.iBestY = 0;
4121 
4122 
4123  // set rcMv (Median predictor) as start point and as best point
4124  xTZSearchHelp( pcPatternKey, cStruct, rcMv.getHor(), rcMv.getVer(), 0, 0 );
4125 
4126  // test whether one of PRED_A, PRED_B, PRED_C MV is better start point than Median predictor
4127  if ( bTestOtherPredictedMV )
4128  {
4129  for ( UInt index = 0; index < NUM_MV_PREDICTORS; index++ )
4130  {
4131  TComMv cMv = m_acMvPredictors[index];
4132  pcCU->clipMv( cMv );
4133  cMv >>= 2;
4134  xTZSearchHelp( pcPatternKey, cStruct, cMv.getHor(), cMv.getVer(), 0, 0 );
4135  }
4136  }
4137 
4138  // test whether zero Mv is better start point than Median predictor
4139  if ( bTestZeroVector )
4140  {
4141  xTZSearchHelp( pcPatternKey, cStruct, 0, 0, 0, 0 );
4142  }
4143 
4144  if ( pIntegerMv2Nx2NPred != 0 )
4145  {
4146  TComMv integerMv2Nx2NPred = *pIntegerMv2Nx2NPred;
4147  integerMv2Nx2NPred <<= 2;
4148  pcCU->clipMv( integerMv2Nx2NPred );
4149  integerMv2Nx2NPred >>= 2;
4150  xTZSearchHelp(pcPatternKey, cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0);
4151 
4152  // reset search range
4153  TComMv cMvSrchRngLT;
4154  TComMv cMvSrchRngRB;
4155  Int iSrchRng = m_iSearchRange;
4156  TComMv currBestMv(cStruct.iBestX, cStruct.iBestY );
4157  currBestMv <<= 2;
4158  xSetSearchRange( pcCU, currBestMv, iSrchRng, cMvSrchRngLT, cMvSrchRngRB );
4159  iSrchRngHorLeft = cMvSrchRngLT.getHor();
4160  iSrchRngHorRight = cMvSrchRngRB.getHor();
4161  iSrchRngVerTop = cMvSrchRngLT.getVer();
4162  iSrchRngVerBottom = cMvSrchRngRB.getVer();
4163  }
4164 
4165  // Initial search
4166  iBestX = cStruct.iBestX;
4167  iBestY = cStruct.iBestY;
4168  iFirstSrchRngHorLeft = ((iBestX - uiSearchRangeInitial) > iSrchRngHorLeft) ? (iBestX - uiSearchRangeInitial) : iSrchRngHorLeft;
4169  iFirstSrchRngVerTop = ((iBestY - uiSearchRangeInitial) > iSrchRngVerTop) ? (iBestY - uiSearchRangeInitial) : iSrchRngVerTop;
4170  iFirstSrchRngHorRight = ((iBestX + uiSearchRangeInitial) < iSrchRngHorRight) ? (iBestX + uiSearchRangeInitial) : iSrchRngHorRight;
4171  iFirstSrchRngVerBottom = ((iBestY + uiSearchRangeInitial) < iSrchRngVerBottom) ? (iBestY + uiSearchRangeInitial) : iSrchRngVerBottom;
4172 
4173  for ( iStartY = iFirstSrchRngVerTop; iStartY <= iFirstSrchRngVerBottom; iStartY += uiSearchStep )
4174  {
4175  for ( iStartX = iFirstSrchRngHorLeft; iStartX <= iFirstSrchRngHorRight; iStartX += uiSearchStep )
4176  {
4177  xTZSearchHelp( pcPatternKey, cStruct, iStartX, iStartY, 0, 0 );
4178  xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, 1 );
4179  xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, 2 );
4180  }
4181  }
4182 
4183  Int iMaxMVDistToPred = (abs(cStruct.iBestX - iBestX) > iMVDistThresh || abs(cStruct.iBestY - iBestY) > iMVDistThresh);
4184 
4185  //full search with early exit if MV is distant from predictors
4186  if ( bEnableRasterSearch && (iMaxMVDistToPred || bAlwaysRasterSearch) )
4187  {
4188  for ( iStartY = iSrchRngVerTop; iStartY <= iSrchRngVerBottom; iStartY += 1 )
4189  {
4190  for ( iStartX = iSrchRngHorLeft; iStartX <= iSrchRngHorRight; iStartX += 1 )
4191  {
4192  xTZSearchHelp( pcPatternKey, cStruct, iStartX, iStartY, 0, 1 );
4193  }
4194  }
4195  }
4196  //Smaller MV, refine around predictor
4197  else if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 )
4198  {
4199  // start refinement
4200  while ( cStruct.uiBestDistance > 0 )
4201  {
4202  iStartX = cStruct.iBestX;
4203  iStartY = cStruct.iBestY;
4204  cStruct.uiBestDistance = 0;
4205  cStruct.ucPointNr = 0;
4206  for ( iDist = 1; iDist < (Int)uiSearchRange + 1; iDist*=2 )
4207  {
4208  if ( bStarRefinementDiamond == 1 )
4209  {
4210  xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist );
4211  }
4212  else
4213  {
4214  xTZ8PointSquareSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist );
4215  }
4216  if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion
4217  {
4218  break;
4219  }
4220  }
4221 
4222  // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1
4223  if ( cStruct.uiBestDistance == 1 )
4224  {
4225  cStruct.uiBestDistance = 0;
4226  if ( cStruct.ucPointNr != 0 )
4227  {
4228  xTZ2PointSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB );
4229  }
4230  }
4231  }
4232  }
4233 
4234  // write out best match
4235  rcMv.set( cStruct.iBestX, cStruct.iBestY );
4236  ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCost( cStruct.iBestX, cStruct.iBestY );
4237 
4238 }
4239 
4240 
4242  Bool bIsLosslessCoded,
4243  TComPattern* pcPatternKey,
4244  Pel* piRefY,
4245  Int iRefStride,
4246  TComMv* pcMvInt,
4247  TComMv& rcMvHalf,
4248  TComMv& rcMvQter,
4249  Distortion& ruiCost,
4250  Bool biPred
4251  )
4252 {
4253  // Reference pattern initialization (integer scale)
4254  TComPattern cPatternRoi;
4255  Int iOffset = pcMvInt->getHor() + pcMvInt->getVer() * iRefStride;
4256  cPatternRoi.initPattern(piRefY + iOffset,
4257  pcPatternKey->getROIYWidth(),
4258  pcPatternKey->getROIYHeight(),
4259  iRefStride );
4260 
4261  // Half-pel refinement
4262  xExtDIFUpSamplingH ( &cPatternRoi, biPred );
4263 
4264  rcMvHalf = *pcMvInt; rcMvHalf <<= 1; // for mv-cost
4265  TComMv baseRefMv(0, 0);
4266  ruiCost = xPatternRefinement( pcPatternKey, baseRefMv, 2, rcMvHalf, !bIsLosslessCoded );
4267 
4268  m_pcRdCost->setCostScale( 0 );
4269 
4270  xExtDIFUpSamplingQ ( &cPatternRoi, rcMvHalf, biPred );
4271  baseRefMv = rcMvHalf;
4272  baseRefMv <<= 1;
4273 
4274  rcMvQter = *pcMvInt; rcMvQter <<= 1; // for mv-cost
4275  rcMvQter += rcMvHalf; rcMvQter <<= 1;
4276  ruiCost = xPatternRefinement( pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded );
4277 }
4278 
4279 
4282  TComYuv* pcYuvResi, TComYuv* pcYuvResiBest, TComYuv* pcYuvRec,
4283  Bool bSkipResidual DEBUG_STRING_FN_DECLARE(sDebug) )
4284 {
4285  assert ( !pcCU->isIntra(0) );
4286 
4287  const UInt cuWidthPixels = pcCU->getWidth ( 0 );
4288  const UInt cuHeightPixels = pcCU->getHeight( 0 );
4289  const Int numValidComponents = pcCU->getPic()->getNumberValidComponents();
4290 
4291  // The pcCU is not marked as skip-mode at this point, and its m_pcTrCoeff, m_pcArlCoeff, m_puhCbf, m_puhTrIdx will all be 0.
4292  // due to prior calls to TComDataCU::initEstData( );
4293 
4294  if ( bSkipResidual ) // No residual coding : SKIP mode
4295  {
4296  pcCU->setSkipFlagSubParts( true, 0, pcCU->getDepth(0) );
4297 
4298  pcYuvResi->clear();
4299 
4300  pcYuvPred->copyToPartYuv( pcYuvRec, 0 );
4301  Distortion distortion = 0;
4302 
4303  for (Int comp=0; comp < numValidComponents; comp++)
4304  {
4305  const ComponentID compID=ComponentID(comp);
4306  const UInt csx=pcYuvOrg->getComponentScaleX(compID);
4307  const UInt csy=pcYuvOrg->getComponentScaleY(compID);
4308  distortion += m_pcRdCost->getDistPart( g_bitDepth[toChannelType(compID)], pcYuvRec->getAddr(compID), pcYuvRec->getStride(compID), pcYuvOrg->getAddr(compID),
4309  pcYuvOrg->getStride(compID), cuWidthPixels >> csx, cuHeightPixels >> csy, compID);
4310  }
4311 
4314 
4315  if (pcCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
4316  {
4318  }
4319 
4320  m_pcEntropyCoder->encodeSkipFlag(pcCU, 0, true);
4321  m_pcEntropyCoder->encodeMergeIndex( pcCU, 0, true );
4322 
4324  pcCU->getTotalBits() = uiBits;
4325  pcCU->getTotalDistortion() = distortion;
4326  pcCU->getTotalCost() = m_pcRdCost->calcRdCost( uiBits, distortion );
4327 
4329 
4330 #ifdef DEBUG_STRING
4331  pcYuvResiBest->clear(); // Clear the residual image, if we didn't code it.
4332  for(UInt i=0; i<MAX_NUM_COMPONENT+1; i++)
4333  {
4334  sDebug+=debug_reorder_data_inter_token[i];
4335  }
4336 #endif
4337 
4338  return;
4339  }
4340 
4341  // Residual coding.
4342 
4343  pcYuvResi->subtract( pcYuvOrg, pcYuvPred, 0, cuWidthPixels );
4344 
4345  TComTURecurse tuLevel0(pcCU, 0);
4346 
4347  Double nonZeroCost = 0;
4348  UInt nonZeroBits = 0;
4349  Distortion nonZeroDistortion = 0;
4350  Distortion zeroDistortion = 0;
4351 
4353 
4354  xEstimateInterResidualQT( pcYuvResi, nonZeroCost, nonZeroBits, nonZeroDistortion, &zeroDistortion, tuLevel0 DEBUG_STRING_PASS_INTO(sDebug) );
4355 
4356  // -------------------------------------------------------
4357  // set the coefficients in the pcCU, and also calculates the residual data.
4358  // If a block full of 0's is efficient, then just use 0's.
4359  // The costs at this point do not include header bits.
4360 
4363  const UInt zeroResiBits = m_pcEntropyCoder->getNumberOfWrittenBits();
4364  const Double zeroCost = (pcCU->isLosslessCoded( 0 )) ? (nonZeroCost+1) : (m_pcRdCost->calcRdCost( zeroResiBits, zeroDistortion ));
4365 
4366  if ( zeroCost < nonZeroCost || !pcCU->getQtRootCbf(0) )
4367  {
4368  const UInt uiQPartNum = tuLevel0.GetAbsPartIdxNumParts();
4369  ::memset( pcCU->getTransformIdx() , 0, uiQPartNum * sizeof(UChar) );
4370  for (Int comp=0; comp < numValidComponents; comp++)
4371  {
4372  const ComponentID component = ComponentID(comp);
4373  ::memset( pcCU->getCbf( component ) , 0, uiQPartNum * sizeof(UChar) );
4374  ::memset( pcCU->getCrossComponentPredictionAlpha(component), 0, ( uiQPartNum * sizeof(Char) ) );
4375  }
4376  static const UInt useTS[MAX_NUM_COMPONENT]={0,0,0};
4377  pcCU->setTransformSkipSubParts ( useTS, 0, pcCU->getDepth(0) );
4378 #ifdef DEBUG_STRING
4379  sDebug.clear();
4380  for(UInt i=0; i<MAX_NUM_COMPONENT+1; i++)
4381  {
4382  sDebug+=debug_reorder_data_inter_token[i];
4383  }
4384 #endif
4385  }
4386  else
4387  {
4388  xSetInterResidualQTData( NULL, false, tuLevel0); // Call first time to set coefficients.
4389  }
4390 
4391  // all decisions now made. Fully encode the CU, including the headers:
4393 
4394  UInt finalBits = 0;
4395  xAddSymbolBitsInter( pcCU, 0, 0, finalBits );
4396  // we've now encoded the pcCU, and so have a valid bit cost
4397 
4398  if ( !pcCU->getQtRootCbf( 0 ) )
4399  {
4400  pcYuvResiBest->clear(); // Clear the residual image, if we didn't code it.
4401  }
4402  else
4403  {
4404  xSetInterResidualQTData( pcYuvResiBest, true, tuLevel0 ); // else set the residual image data pcYUVResiBest from the various temp images.
4405  }
4407 
4408  pcYuvRec->addClip ( pcYuvPred, pcYuvResiBest, 0, cuWidthPixels );
4409 
4410  // update with clipped distortion and cost (previously unclipped reconstruction values were used)
4411 
4412  Distortion finalDistortion = 0;
4413  for(Int comp=0; comp<numValidComponents; comp++)
4414  {
4415  const ComponentID compID=ComponentID(comp);
4416  finalDistortion += m_pcRdCost->getDistPart( g_bitDepth[toChannelType(compID)], pcYuvRec->getAddr(compID ), pcYuvRec->getStride(compID ), pcYuvOrg->getAddr(compID ), pcYuvOrg->getStride(compID), cuWidthPixels >> pcYuvOrg->getComponentScaleX(compID), cuHeightPixels >> pcYuvOrg->getComponentScaleY(compID), compID);
4417  }
4418 
4419  pcCU->getTotalBits() = finalBits;
4420  pcCU->getTotalDistortion() = finalDistortion;
4421  pcCU->getTotalCost() = m_pcRdCost->calcRdCost( finalBits, finalDistortion );
4422 }
4423 
4424 
4425 
4427  Double &rdCost,
4428  UInt &ruiBits,
4429  Distortion &ruiDist,
4430  Distortion *puiZeroDist,
4431  TComTU &rTu
4432  DEBUG_STRING_FN_DECLARE(sDebug) )
4433 {
4434  TComDataCU *pcCU = rTu.getCU();
4435  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
4436  const UInt uiDepth = rTu.GetTransformDepthTotal();
4437  const UInt uiTrMode = rTu.GetTransformDepthRel();
4438  const UInt subTUDepth = uiTrMode + 1;
4439  const UInt numValidComp = pcCU->getPic()->getNumberValidComponents();
4440  DEBUG_STRING_NEW(sSingleStringComp[MAX_NUM_COMPONENT])
4441 
4442  assert( pcCU->getDepth( 0 ) == pcCU->getDepth( uiAbsPartIdx ) );
4443  const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
4444 
4445  UInt SplitFlag = ((pcCU->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) && pcCU->isInter(uiAbsPartIdx) && ( pcCU->getPartitionSize(uiAbsPartIdx) != SIZE_2Nx2N ));
4446 #ifdef DEBUG_STRING
4447  const Int debugPredModeMask = DebugStringGetPredModeMask(pcCU->getPredictionMode(uiAbsPartIdx));
4448 #endif
4449 
4450  Bool bCheckFull;
4451 
4452  if ( SplitFlag && uiDepth == pcCU->getDepth(uiAbsPartIdx) && ( uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) ) )
4453  {
4454  bCheckFull = false;
4455  }
4456  else
4457  {
4458  bCheckFull = ( uiLog2TrSize <= pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() );
4459  }
4460 
4461  const Bool bCheckSplit = ( uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) );
4462 
4463  assert( bCheckFull || bCheckSplit );
4464 
4465  // code full block
4466  Double dSingleCost = MAX_DOUBLE;
4467  UInt uiSingleBits = 0;
4468  Distortion uiSingleDistComp [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}};
4469  Distortion uiSingleDist = 0;
4470  TCoeff uiAbsSum [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}};
4471  UInt uiBestTransformMode [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}};
4472  // Stores the best explicit RDPCM mode for a TU encoded without split
4473  UInt bestExplicitRdpcmModeUnSplit[MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{3,3}, {3,3}, {3,3}};
4474  Char bestCrossCPredictionAlpha [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}};
4475 
4477 
4478  if( bCheckFull )
4479  {
4480  Double minCost[MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/];
4481  Bool checkTransformSkip[MAX_NUM_COMPONENT];
4482  pcCU->setTrIdxSubParts( uiTrMode, uiAbsPartIdx, uiDepth );
4483 
4485 
4486  memset( m_pTempPel, 0, sizeof( Pel ) * rTu.getRect(COMPONENT_Y).width * rTu.getRect(COMPONENT_Y).height ); // not necessary needed for inside of recursion (only at the beginning)
4487 
4488  const UInt uiQTTempAccessLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
4489  TCoeff *pcCoeffCurr[MAX_NUM_COMPONENT];
4490 #if ADAPTIVE_QP_SELECTION
4491  TCoeff *pcArlCoeffCurr[MAX_NUM_COMPONENT];
4492 #endif
4493 
4494  for(UInt i=0; i<numValidComp; i++)
4495  {
4496  minCost[i][0] = MAX_DOUBLE;
4497  minCost[i][1] = MAX_DOUBLE;
4498  }
4499 
4500  Pel crossCPredictedResidualBuffer[ MAX_TU_SIZE * MAX_TU_SIZE ];
4501 
4502  for(UInt i=0; i<numValidComp; i++)
4503  {
4504  checkTransformSkip[i]=false;
4505  const ComponentID compID=ComponentID(i);
4506  pcCoeffCurr[compID] = m_ppcQTTempCoeff[compID][uiQTTempAccessLayer] + rTu.getCoefficientOffset(compID);
4507 #if ADAPTIVE_QP_SELECTION
4508  pcArlCoeffCurr[compID] = m_ppcQTTempArlCoeff[compID ][uiQTTempAccessLayer] + rTu.getCoefficientOffset(compID);
4509 #endif
4510 
4511  if(rTu.ProcessComponentSection(compID))
4512  {
4513  const QpParam cQP(*pcCU, compID);
4514 
4515  checkTransformSkip[compID] = pcCU->getSlice()->getPPS()->getUseTransformSkip() &&
4517  (!pcCU->isLosslessCoded(0));
4518 
4519  const Bool splitIntoSubTUs = rTu.getRect(compID).width != rTu.getRect(compID).height;
4520 
4521  TComTURecurse TUIterator(rTu, false, (splitIntoSubTUs ? TComTU::VERTICAL_SPLIT : TComTU::DONT_SPLIT), true, compID);
4522 
4523  const UInt partIdxesPerSubTU = TUIterator.GetAbsPartIdxNumParts(compID);
4524 
4525  do
4526  {
4527  const UInt subTUIndex = TUIterator.GetSectionNumber();
4528  const UInt subTUAbsPartIdx = TUIterator.GetAbsPartIdxTU(compID);
4529  const TComRectangle &tuCompRect = TUIterator.getRect(compID);
4530  const UInt subTUBufferOffset = tuCompRect.width * tuCompRect.height * subTUIndex;
4531 
4532  TCoeff *currentCoefficients = pcCoeffCurr[compID] + subTUBufferOffset;
4533 #if ADAPTIVE_QP_SELECTION
4534  TCoeff *currentARLCoefficients = pcArlCoeffCurr[compID] + subTUBufferOffset;
4535 #endif
4536  const Bool isCrossCPredictionAvailable = isChroma(compID)
4538  && (pcCU->getCbf(subTUAbsPartIdx, COMPONENT_Y, uiTrMode) != 0);
4539 
4540  Char preCalcAlpha = 0;
4541  const Pel *pLumaResi = m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( COMPONENT_Y, rTu.getRect( COMPONENT_Y ).x0, rTu.getRect( COMPONENT_Y ).y0 );
4542 
4543  if (isCrossCPredictionAvailable)
4544  {
4545  const Bool bUseReconstructedResidualForEstimate = m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate();
4546  const Pel *const lumaResidualForEstimate = bUseReconstructedResidualForEstimate ? pLumaResi : pcResi->getAddrPix(COMPONENT_Y, tuCompRect.x0, tuCompRect.y0);
4547  const UInt lumaResidualStrideForEstimate = bUseReconstructedResidualForEstimate ? m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y) : pcResi->getStride(COMPONENT_Y);
4548 
4549  preCalcAlpha = xCalcCrossComponentPredictionAlpha(TUIterator,
4550  compID,
4551  lumaResidualForEstimate,
4552  pcResi->getAddrPix(compID, tuCompRect.x0, tuCompRect.y0),
4553  tuCompRect.width,
4554  tuCompRect.height,
4555  lumaResidualStrideForEstimate,
4556  pcResi->getStride(compID));
4557  }
4558 
4559  const Int transformSkipModesToTest = checkTransformSkip[compID] ? 2 : 1;
4560  const Int crossCPredictionModesToTest = (preCalcAlpha != 0) ? 2 : 1; // preCalcAlpha cannot be anything other than 0 if isCrossCPredictionAvailable is false
4561 
4562  const Bool isOneMode = (crossCPredictionModesToTest == 1) && (transformSkipModesToTest == 1);
4563 
4564  for (Int transformSkipModeId = 0; transformSkipModeId < transformSkipModesToTest; transformSkipModeId++)
4565  {
4566  pcCU->setTransformSkipPartRange(transformSkipModeId, compID, subTUAbsPartIdx, partIdxesPerSubTU);
4567 
4568  for (Int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++)
4569  {
4570  const Bool isFirstMode = (transformSkipModeId == 0) && (crossCPredictionModeId == 0);
4571  const Bool bUseCrossCPrediction = crossCPredictionModeId != 0;
4572 
4573  m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] );
4575 
4576  pcCU->setTransformSkipPartRange(transformSkipModeId, compID, subTUAbsPartIdx, partIdxesPerSubTU);
4577  pcCU->setCrossComponentPredictionAlphaPartRange((bUseCrossCPrediction ? preCalcAlpha : 0), compID, subTUAbsPartIdx, partIdxesPerSubTU );
4578 
4579  if ((compID != COMPONENT_Cr) && ((transformSkipModeId == 1) ? m_pcEncCfg->getUseRDOQTS() : m_pcEncCfg->getUseRDOQ()))
4580  {
4582  }
4583 
4584 #if RDOQ_CHROMA_LAMBDA
4585  m_pcTrQuant->selectLambda(compID);
4586 #endif
4587 
4588  Pel *pcResiCurrComp = m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0);
4589  UInt resiStride = m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID);
4590 
4591  TCoeff bestCoeffComp [MAX_TU_SIZE*MAX_TU_SIZE];
4592  Pel bestResiComp [MAX_TU_SIZE*MAX_TU_SIZE];
4593 
4594 #if ADAPTIVE_QP_SELECTION
4595  TCoeff bestArlCoeffComp[MAX_TU_SIZE*MAX_TU_SIZE];
4596 #endif
4597  TCoeff currAbsSum = 0;
4598  UInt currCompBits = 0;
4599  Distortion currCompDist = 0;
4600  Double currCompCost = 0;
4601  UInt nonCoeffBits = 0;
4602  Distortion nonCoeffDist = 0;
4603  Double nonCoeffCost = 0;
4604 
4605  if(!isOneMode && !isFirstMode)
4606  {
4607  memcpy(bestCoeffComp, currentCoefficients, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height));
4608 #if ADAPTIVE_QP_SELECTION
4609  memcpy(bestArlCoeffComp, currentARLCoefficients, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height));
4610 #endif
4611  for(Int y = 0; y < tuCompRect.height; y++)
4612  {
4613  memcpy(&bestResiComp[y * tuCompRect.width], (pcResiCurrComp + (y * resiStride)), (sizeof(Pel) * tuCompRect.width));
4614  }
4615  }
4616 
4617  if (bUseCrossCPrediction)
4618  {
4620  compID,
4621  pLumaResi,
4622  pcResi->getAddrPix(compID, tuCompRect.x0, tuCompRect.y0),
4623  crossCPredictedResidualBuffer,
4624  tuCompRect.width,
4625  tuCompRect.height,
4626  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y),
4627  pcResi->getStride(compID),
4628  tuCompRect.width,
4629  false);
4630 
4631  m_pcTrQuant->transformNxN(TUIterator, compID, crossCPredictedResidualBuffer, tuCompRect.width, currentCoefficients,
4633  currentARLCoefficients,
4634 #endif
4635  currAbsSum, cQP);
4636  }
4637  else
4638  {
4639  m_pcTrQuant->transformNxN(TUIterator, compID, pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ), pcResi->getStride(compID), currentCoefficients,
4640 #if ADAPTIVE_QP_SELECTION
4641  currentARLCoefficients,
4642 #endif
4643  currAbsSum, cQP);
4644  }
4645 
4646  if(isFirstMode || (currAbsSum == 0))
4647  {
4648  if (bUseCrossCPrediction)
4649  {
4651  compID,
4652  pLumaResi,
4653  m_pTempPel,
4654  m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0),
4655  tuCompRect.width,
4656  tuCompRect.height,
4657  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y),
4658  tuCompRect.width,
4659  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID),
4660  true);
4661 
4662  nonCoeffDist = m_pcRdCost->getDistPart( g_bitDepth[toChannelType(compID)], m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ),
4663  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride( compID ), pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ),
4664  pcResi->getStride(compID), tuCompRect.width, tuCompRect.height, compID); // initialized with zero residual destortion
4665  }
4666  else
4667  {
4668  nonCoeffDist = m_pcRdCost->getDistPart( g_bitDepth[toChannelType(compID)], m_pTempPel, tuCompRect.width, pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ),
4669  pcResi->getStride(compID), tuCompRect.width, tuCompRect.height, compID); // initialized with zero residual destortion
4670  }
4671 
4672  m_pcEntropyCoder->encodeQtCbfZero( TUIterator, toChannelType(compID) );
4673 
4674  if ( isCrossCPredictionAvailable )
4675  {
4676  m_pcEntropyCoder->encodeCrossComponentPrediction( TUIterator, compID );
4677  }
4678 
4679  nonCoeffBits = m_pcEntropyCoder->getNumberOfWrittenBits();
4680  nonCoeffCost = m_pcRdCost->calcRdCost( nonCoeffBits, nonCoeffDist );
4681  }
4682 
4683  if((puiZeroDist != NULL) && isFirstMode)
4684  {
4685  *puiZeroDist += nonCoeffDist; // initialized with zero residual destortion
4686  }
4687 
4688  DEBUG_STRING_NEW(sSingleStringTest)
4689 
4690  if( currAbsSum > 0 ) //if non-zero coefficients are present, a residual needs to be derived for further prediction
4691  {
4692  if (isFirstMode)
4693  {
4694  m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] );
4696  }
4697 
4698  m_pcEntropyCoder->encodeQtCbf( TUIterator, compID, true );
4699 
4700  if (isCrossCPredictionAvailable)
4701  {
4702  m_pcEntropyCoder->encodeCrossComponentPrediction( TUIterator, compID );
4703  }
4704 
4705  m_pcEntropyCoder->encodeCoeffNxN( TUIterator, currentCoefficients, compID );
4706  currCompBits = m_pcEntropyCoder->getNumberOfWrittenBits();
4707 
4708  pcResiCurrComp = m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 );
4709 
4710  m_pcTrQuant->invTransformNxN( TUIterator, compID, pcResiCurrComp, m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID), currentCoefficients, cQP DEBUG_STRING_PASS_INTO_OPTIONAL(&sSingleStringTest, (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask)) );
4711 
4712  if (bUseCrossCPrediction)
4713  {
4715  compID,
4716  pLumaResi,
4717  m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0),
4718  m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0),
4719  tuCompRect.width,
4720  tuCompRect.height,
4721  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y),
4722  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID ),
4723  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID ),
4724  true);
4725  }
4726 
4727  currCompDist = m_pcRdCost->getDistPart( g_bitDepth[toChannelType(compID)], m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ),
4728  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID),
4729  pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ),
4730  pcResi->getStride(compID),
4731  tuCompRect.width, tuCompRect.height, compID);
4732 
4733  currCompCost = m_pcRdCost->calcRdCost(currCompBits, currCompDist);
4734 
4735  if (pcCU->isLosslessCoded(0))
4736  {
4737  nonCoeffCost = MAX_DOUBLE;
4738  }
4739  }
4740  else if ((transformSkipModeId == 1) && !bUseCrossCPrediction)
4741  {
4742  currCompCost = MAX_DOUBLE;
4743  }
4744  else
4745  {
4746  currCompBits = nonCoeffBits;
4747  currCompDist = nonCoeffDist;
4748  currCompCost = nonCoeffCost;
4749  }
4750 
4751  // evaluate
4752  if ((currCompCost < minCost[compID][subTUIndex]) || ((transformSkipModeId == 1) && (currCompCost == minCost[compID][subTUIndex])))
4753  {
4754  bestExplicitRdpcmModeUnSplit[compID][subTUIndex] = pcCU->getExplicitRdpcmMode(compID, subTUAbsPartIdx);
4755 
4756  if(isFirstMode) //check for forced null
4757  {
4758  if((nonCoeffCost < currCompCost) || (currAbsSum == 0))
4759  {
4760  memset(currentCoefficients, 0, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height));
4761 
4762  currAbsSum = 0;
4763  currCompBits = nonCoeffBits;
4764  currCompDist = nonCoeffDist;
4765  currCompCost = nonCoeffCost;
4766  }
4767  }
4768 
4769 #ifdef DEBUG_STRING
4770  if (currAbsSum > 0)
4771  {
4772  DEBUG_STRING_SWAP(sSingleStringComp[compID], sSingleStringTest)
4773  }
4774  else
4775  {
4776  sSingleStringComp[compID].clear();
4777  }
4778 #endif
4779 
4780  uiAbsSum [compID][subTUIndex] = currAbsSum;
4781  uiSingleDistComp [compID][subTUIndex] = currCompDist;
4782  minCost [compID][subTUIndex] = currCompCost;
4783  uiBestTransformMode [compID][subTUIndex] = transformSkipModeId;
4784  bestCrossCPredictionAlpha[compID][subTUIndex] = (crossCPredictionModeId == 1) ? pcCU->getCrossComponentPredictionAlpha(subTUAbsPartIdx, compID) : 0;
4785 
4786  if (uiAbsSum[compID][subTUIndex] == 0)
4787  {
4788  if (bUseCrossCPrediction)
4789  {
4791  compID,
4792  pLumaResi,
4793  m_pTempPel,
4794  m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0),
4795  tuCompRect.width,
4796  tuCompRect.height,
4797  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y),
4798  tuCompRect.width,
4799  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID),
4800  true);
4801  }
4802  else
4803  {
4804  pcResiCurrComp = m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0);
4805  const UInt uiStride = m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID);
4806  for(UInt uiY = 0; uiY < tuCompRect.height; uiY++)
4807  {
4808  memset(pcResiCurrComp, 0, (sizeof(Pel) * tuCompRect.width));
4809  pcResiCurrComp += uiStride;
4810  }
4811  }
4812  }
4813  }
4814  else
4815  {
4816  // reset
4817  memcpy(currentCoefficients, bestCoeffComp, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height));
4818 #if ADAPTIVE_QP_SELECTION
4819  memcpy(currentARLCoefficients, bestArlCoeffComp, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height));
4820 #endif
4821  for (Int y = 0; y < tuCompRect.height; y++)
4822  {
4823  memcpy((pcResiCurrComp + (y * resiStride)), &bestResiComp[y * tuCompRect.width], (sizeof(Pel) * tuCompRect.width));
4824  }
4825  }
4826  }
4827  }
4828 
4829  pcCU->setExplicitRdpcmModePartRange ( bestExplicitRdpcmModeUnSplit[compID][subTUIndex], compID, subTUAbsPartIdx, partIdxesPerSubTU);
4830  pcCU->setTransformSkipPartRange ( uiBestTransformMode [compID][subTUIndex], compID, subTUAbsPartIdx, partIdxesPerSubTU );
4831  pcCU->setCbfPartRange ((((uiAbsSum [compID][subTUIndex] > 0) ? 1 : 0) << uiTrMode), compID, subTUAbsPartIdx, partIdxesPerSubTU );
4832  pcCU->setCrossComponentPredictionAlphaPartRange( bestCrossCPredictionAlpha [compID][subTUIndex], compID, subTUAbsPartIdx, partIdxesPerSubTU );
4833  } while (TUIterator.nextSection(rTu)); //end of sub-TU loop
4834  } // processing section
4835  } // component loop
4836 
4837  for(UInt ch = 0; ch < numValidComp; ch++)
4838  {
4839  const ComponentID compID = ComponentID(ch);
4840  if (rTu.ProcessComponentSection(compID) && (rTu.getRect(compID).width != rTu.getRect(compID).height))
4841  {
4842  offsetSubTUCBFs(rTu, compID); //the CBFs up to now have been defined for two sub-TUs - shift them down a level and replace with the parent level CBF
4843  }
4844  }
4845 
4846  m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] );
4848 
4849  if( uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) )
4850  {
4851  m_pcEntropyCoder->encodeTransformSubdivFlag( 0, 5 - uiLog2TrSize );
4852  }
4853 
4854  for(UInt ch = 0; ch < numValidComp; ch++)
4855  {
4856  const UInt chOrderChange = ((ch + 1) == numValidComp) ? 0 : (ch + 1);
4857  const ComponentID compID=ComponentID(chOrderChange);
4858  if( rTu.ProcessComponentSection(compID) )
4859  {
4860  m_pcEntropyCoder->encodeQtCbf( rTu, compID, true );
4861  }
4862  }
4863 
4864  for(UInt ch = 0; ch < numValidComp; ch++)
4865  {
4866  const ComponentID compID=ComponentID(ch);
4867  if (rTu.ProcessComponentSection(compID))
4868  {
4869  if(isChroma(compID) && (uiAbsSum[COMPONENT_Y][0] != 0))
4870  {
4872  }
4873 
4874  m_pcEntropyCoder->encodeCoeffNxN( rTu, pcCoeffCurr[compID], compID );
4875  for (UInt subTUIndex = 0; subTUIndex < 2; subTUIndex++)
4876  {
4877  uiSingleDist += uiSingleDistComp[compID][subTUIndex];
4878  }
4879  }
4880  }
4881 
4882  uiSingleBits = m_pcEntropyCoder->getNumberOfWrittenBits();
4883 
4884  dSingleCost = m_pcRdCost->calcRdCost( uiSingleBits, uiSingleDist );
4885  } // check full
4886 
4887  // code sub-blocks
4888  if( bCheckSplit )
4889  {
4890  if( bCheckFull )
4891  {
4893  m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] );
4894  }
4895  Distortion uiSubdivDist = 0;
4896  UInt uiSubdivBits = 0;
4897  Double dSubdivCost = 0.0;
4898 
4899  //save the non-split CBFs in case we need to restore them later
4900 
4901  UInt bestCBF [MAX_NUM_COMPONENT];
4902  UInt bestsubTUCBF[MAX_NUM_COMPONENT][2];
4903  for(UInt ch = 0; ch < numValidComp; ch++)
4904  {
4905  const ComponentID compID=ComponentID(ch);
4906 
4907  if (rTu.ProcessComponentSection(compID))
4908  {
4909  bestCBF[compID] = pcCU->getCbf(uiAbsPartIdx, compID, uiTrMode);
4910 
4911  const TComRectangle &tuCompRect = rTu.getRect(compID);
4912  if (tuCompRect.width != tuCompRect.height)
4913  {
4914  const UInt partIdxesPerSubTU = rTu.GetAbsPartIdxNumParts(compID) >> 1;
4915 
4916  for (UInt subTU = 0; subTU < 2; subTU++)
4917  {
4918  bestsubTUCBF[compID][subTU] = pcCU->getCbf ((uiAbsPartIdx + (subTU * partIdxesPerSubTU)), compID, subTUDepth);
4919  }
4920  }
4921  }
4922  }
4923 
4924 
4925  TComTURecurse tuRecurseChild(rTu, false);
4926  const UInt uiQPartNumSubdiv = tuRecurseChild.GetAbsPartIdxNumParts();
4927 
4928  DEBUG_STRING_NEW(sSplitString[MAX_NUM_COMPONENT])
4929 
4930  do
4931  {
4932  DEBUG_STRING_NEW(childString)
4933  xEstimateInterResidualQT( pcResi, dSubdivCost, uiSubdivBits, uiSubdivDist, bCheckFull ? NULL : puiZeroDist, tuRecurseChild DEBUG_STRING_PASS_INTO(childString));
4934 #ifdef DEBUG_STRING
4935  // split the string by component and append to the relevant output (because decoder decodes in channel order, whereas this search searches by TU-order)
4936  std::size_t lastPos=0;
4937  const std::size_t endStrng=childString.find(debug_reorder_data_inter_token[MAX_NUM_COMPONENT], lastPos);
4938  for(UInt ch = 0; ch < numValidComp; ch++)
4939  {
4940  if (lastPos!=std::string::npos && childString.find(debug_reorder_data_inter_token[ch], lastPos)==lastPos)
4941  {
4942  lastPos+=strlen(debug_reorder_data_inter_token[ch]); // skip leading string
4943  }
4944  std::size_t pos=childString.find(debug_reorder_data_inter_token[ch+1], lastPos);
4945  if (pos!=std::string::npos && pos>endStrng)
4946  {
4947  lastPos=endStrng;
4948  }
4949  sSplitString[ch]+=childString.substr(lastPos, (pos==std::string::npos)? std::string::npos : (pos-lastPos) );
4950  lastPos=pos;
4951  }
4952 #endif
4953  } while ( tuRecurseChild.nextSection(rTu) ) ;
4954 
4955  UInt uiCbfAny=0;
4956  for(UInt ch = 0; ch < numValidComp; ch++)
4957  {
4958  UInt uiYUVCbf = 0;
4959  for( UInt ui = 0; ui < 4; ++ui )
4960  {
4961  uiYUVCbf |= pcCU->getCbf( uiAbsPartIdx + ui * uiQPartNumSubdiv, ComponentID(ch), uiTrMode + 1 );
4962  }
4963  UChar *pBase=pcCU->getCbf( ComponentID(ch) );
4964  const UInt flags=uiYUVCbf << uiTrMode;
4965  for( UInt ui = 0; ui < 4 * uiQPartNumSubdiv; ++ui )
4966  {
4967  pBase[uiAbsPartIdx + ui] |= flags;
4968  }
4969  uiCbfAny|=uiYUVCbf;
4970  }
4971 
4972  m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] );
4974 
4975  // when compID isn't a channel, code Cbfs:
4976  xEncodeInterResidualQT( MAX_NUM_COMPONENT, rTu );
4977  for(UInt ch = 0; ch < numValidComp; ch++)
4978  {
4979  xEncodeInterResidualQT( ComponentID(ch), rTu );
4980  }
4981 
4982  uiSubdivBits = m_pcEntropyCoder->getNumberOfWrittenBits();
4983  dSubdivCost = m_pcRdCost->calcRdCost( uiSubdivBits, uiSubdivDist );
4984 
4985  if (!bCheckFull || (uiCbfAny && (dSubdivCost < dSingleCost)))
4986  {
4987  rdCost += dSubdivCost;
4988  ruiBits += uiSubdivBits;
4989  ruiDist += uiSubdivDist;
4990 #ifdef DEBUG_STRING
4991  for(UInt ch = 0; ch < numValidComp; ch++)
4992  {
4993  DEBUG_STRING_APPEND(sDebug, debug_reorder_data_inter_token[ch])
4994  DEBUG_STRING_APPEND(sDebug, sSplitString[ch])
4995  }
4996 #endif
4997  }
4998  else
4999  {
5000  rdCost += dSingleCost;
5001  ruiBits += uiSingleBits;
5002  ruiDist += uiSingleDist;
5003 
5004  //restore state to unsplit
5005 
5006  pcCU->setTrIdxSubParts( uiTrMode, uiAbsPartIdx, uiDepth );
5007 
5008  for(UInt ch = 0; ch < numValidComp; ch++)
5009  {
5010  const ComponentID compID=ComponentID(ch);
5011 
5012  DEBUG_STRING_APPEND(sDebug, debug_reorder_data_inter_token[ch])
5013  if (rTu.ProcessComponentSection(compID))
5014  {
5015  DEBUG_STRING_APPEND(sDebug, sSingleStringComp[compID])
5016 
5017  const Bool splitIntoSubTUs = rTu.getRect(compID).width != rTu.getRect(compID).height;
5018  const UInt numberOfSections = splitIntoSubTUs ? 2 : 1;
5019  const UInt partIdxesPerSubTU = rTu.GetAbsPartIdxNumParts(compID) >> (splitIntoSubTUs ? 1 : 0);
5020 
5021  for (UInt subTUIndex = 0; subTUIndex < numberOfSections; subTUIndex++)
5022  {
5023  const UInt uisubTUPartIdx = uiAbsPartIdx + (subTUIndex * partIdxesPerSubTU);
5024 
5025  if (splitIntoSubTUs)
5026  {
5027  const UChar combinedCBF = (bestsubTUCBF[compID][subTUIndex] << subTUDepth) | (bestCBF[compID] << uiTrMode);
5028  pcCU->setCbfPartRange(combinedCBF, compID, uisubTUPartIdx, partIdxesPerSubTU);
5029  }
5030  else
5031  {
5032  pcCU->setCbfPartRange((bestCBF[compID] << uiTrMode), compID, uisubTUPartIdx, partIdxesPerSubTU);
5033  }
5034 
5035  pcCU->setCrossComponentPredictionAlphaPartRange(bestCrossCPredictionAlpha[compID][subTUIndex], compID, uisubTUPartIdx, partIdxesPerSubTU);
5036  pcCU->setTransformSkipPartRange(uiBestTransformMode[compID][subTUIndex], compID, uisubTUPartIdx, partIdxesPerSubTU);
5037  pcCU->setExplicitRdpcmModePartRange(bestExplicitRdpcmModeUnSplit[compID][subTUIndex], compID, uisubTUPartIdx, partIdxesPerSubTU);
5038  }
5039  }
5040  }
5041 
5043  }
5044  }
5045  else
5046  {
5047  rdCost += dSingleCost;
5048  ruiBits += uiSingleBits;
5049  ruiDist += uiSingleDist;
5050 #ifdef DEBUG_STRING
5051  for(UInt ch = 0; ch < numValidComp; ch++)
5052  {
5053  const ComponentID compID=ComponentID(ch);
5054  DEBUG_STRING_APPEND(sDebug, debug_reorder_data_inter_token[compID])
5055 
5056  if (rTu.ProcessComponentSection(compID))
5057  {
5058  DEBUG_STRING_APPEND(sDebug, sSingleStringComp[compID])
5059  }
5060  }
5061 #endif
5062  }
5063  DEBUG_STRING_APPEND(sDebug, debug_reorder_data_inter_token[MAX_NUM_COMPONENT])
5064 }
5065 
5066 
5067 
5069 {
5070  TComDataCU* pcCU=rTu.getCU();
5071  const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
5072  const UInt uiCurrTrMode = rTu.GetTransformDepthRel();
5073  assert( pcCU->getDepth( 0 ) == pcCU->getDepth( uiAbsPartIdx ) );
5074  const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
5075 
5076  const Bool bSubdiv = uiCurrTrMode != uiTrMode;
5077 
5078  const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
5079 
5080  if (compID==MAX_NUM_COMPONENT) // we are not processing a channel, instead we always recurse and code the CBFs
5081  {
5082  if( uiLog2TrSize <= pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() && uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) )
5083  {
5084  m_pcEntropyCoder->encodeTransformSubdivFlag( bSubdiv, 5 - uiLog2TrSize );
5085  }
5086 
5087  assert( !pcCU->isIntra(uiAbsPartIdx) );
5088 
5089  const Bool bFirstCbfOfCU = uiCurrTrMode == 0;
5090 
5091  for (UInt ch=COMPONENT_Cb; ch<pcCU->getPic()->getNumberValidComponents(); ch++)
5092  {
5093  const ComponentID compIdInner=ComponentID(ch);
5094  if( bFirstCbfOfCU || rTu.ProcessingAllQuadrants(compIdInner) )
5095  {
5096  if( bFirstCbfOfCU || pcCU->getCbf( uiAbsPartIdx, compIdInner, uiCurrTrMode - 1 ) )
5097  {
5098  m_pcEntropyCoder->encodeQtCbf( rTu, compIdInner, !bSubdiv );
5099  }
5100  }
5101  else
5102  {
5103  assert( pcCU->getCbf( uiAbsPartIdx, compIdInner, uiCurrTrMode ) == pcCU->getCbf( uiAbsPartIdx, compIdInner, uiCurrTrMode - 1 ) );
5104  }
5105  }
5106 
5107  if (!bSubdiv)
5108  {
5109  m_pcEntropyCoder->encodeQtCbf( rTu, COMPONENT_Y, true );
5110  }
5111  }
5112 
5113  if( !bSubdiv )
5114  {
5115  if (compID != MAX_NUM_COMPONENT) // we have already coded the CBFs, so now we code coefficients
5116  {
5117  if (rTu.ProcessComponentSection(compID))
5118  {
5119  if (isChroma(compID) && (pcCU->getCbf(uiAbsPartIdx, COMPONENT_Y, uiTrMode) != 0))
5120  {
5122  }
5123 
5124  if (pcCU->getCbf(uiAbsPartIdx, compID, uiTrMode) != 0)
5125  {
5126  const UInt uiQTTempAccessLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
5127  TCoeff *pcCoeffCurr = m_ppcQTTempCoeff[compID][uiQTTempAccessLayer] + rTu.getCoefficientOffset(compID);
5128  m_pcEntropyCoder->encodeCoeffNxN( rTu, pcCoeffCurr, compID );
5129  }
5130  }
5131  }
5132  }
5133  else
5134  {
5135  if( compID==MAX_NUM_COMPONENT || pcCU->getCbf( uiAbsPartIdx, compID, uiCurrTrMode ) )
5136  {
5137  TComTURecurse tuRecurseChild(rTu, false);
5138  do
5139  {
5140  xEncodeInterResidualQT( compID, tuRecurseChild );
5141  } while (tuRecurseChild.nextSection(rTu));
5142  }
5143  }
5144 }
5145 
5146 
5147 
5148 
5149 Void TEncSearch::xSetInterResidualQTData( TComYuv* pcResi, Bool bSpatial, TComTU &rTu ) // TODO: turn this into two functions for bSpatial=true and false.
5150 {
5151  TComDataCU* pcCU=rTu.getCU();
5152  const UInt uiCurrTrMode=rTu.GetTransformDepthRel();
5153  const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
5154  assert( pcCU->getDepth( 0 ) == pcCU->getDepth( uiAbsPartIdx ) );
5155  const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
5156  const TComSPS *sps=pcCU->getSlice()->getSPS();
5157 
5158  if( uiCurrTrMode == uiTrMode )
5159  {
5160  const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
5161  const UInt uiQTTempAccessLayer = sps->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
5162 
5163  if( bSpatial )
5164  {
5165  // Data to be copied is in the spatial domain, i.e., inverse-transformed.
5166 
5167  for(UInt i=0; i<pcResi->getNumberValidComponents(); i++)
5168  {
5169  const ComponentID compID=ComponentID(i);
5170  if (rTu.ProcessComponentSection(compID))
5171  {
5172  const TComRectangle &rectCompTU(rTu.getRect(compID));
5173  m_pcQTTempTComYuv[uiQTTempAccessLayer].copyPartToPartComponentMxN ( compID, pcResi, rectCompTU );
5174  }
5175  }
5176  }
5177  else
5178  {
5179  for (UInt ch=0; ch < getNumberValidComponents(sps->getChromaFormatIdc()); ch++)
5180  {
5181  const ComponentID compID = ComponentID(ch);
5182  if (rTu.ProcessComponentSection(compID))
5183  {
5184  const TComRectangle &rectCompTU(rTu.getRect(compID));
5185  const UInt numCoeffInBlock = rectCompTU.width * rectCompTU.height;
5186  const UInt offset = rTu.getCoefficientOffset(compID);
5187  TCoeff* dest = pcCU->getCoeff(compID) + offset;
5188  const TCoeff* src = m_ppcQTTempCoeff[compID][uiQTTempAccessLayer] + offset;
5189  ::memcpy( dest, src, sizeof(TCoeff)*numCoeffInBlock );
5190 
5191 #if ADAPTIVE_QP_SELECTION
5192  TCoeff* pcArlCoeffSrc = m_ppcQTTempArlCoeff[compID][uiQTTempAccessLayer] + offset;
5193  TCoeff