mirror of
https://github.com/FreeRDP/FreeRDP.git
synced 2025-06-03 00:00:20 +00:00
commit
13cacd5010
@ -756,7 +756,10 @@ void h264_context_free(H264_CONTEXT* h264)
|
||||
if (h264)
|
||||
{
|
||||
if (h264->subsystem)
|
||||
{
|
||||
WINPR_ASSERT(h264->subsystem->Uninit);
|
||||
h264->subsystem->Uninit(h264);
|
||||
}
|
||||
|
||||
for (size_t x = 0; x < 3; x++)
|
||||
{
|
||||
|
@ -76,6 +76,7 @@ void primitives_init_YCoCg(primitives_t* WINPR_RESTRICT prims)
|
||||
|
||||
void primitives_init_YCoCg_opt(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_YCoCg(prims);
|
||||
primitives_init_YCoCg_ssse3(prims);
|
||||
primitives_init_YCoCg_neon(prims);
|
||||
}
|
||||
|
@ -32,8 +32,6 @@
|
||||
FREERDP_LOCAL void primitives_init_YCoCg_ssse3_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_YCoCg(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresentEx(PF_EX_SSSE3) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
@ -45,8 +43,6 @@ static inline void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prim
|
||||
FREERDP_LOCAL void primitives_init_YCoCg_neon_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_YCoCg(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
primitives_init_YCoCg_neon_int(prims);
|
||||
|
@ -341,7 +341,6 @@ static inline void general_YUV444ToRGB_DOUBLE_ROW(BYTE* WINPR_RESTRICT pRGB[2],
|
||||
const BYTE* WINPR_RESTRICT pU[2],
|
||||
const BYTE* WINPR_RESTRICT pV[2], size_t nWidth)
|
||||
{
|
||||
const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
|
||||
fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
|
||||
|
||||
WINPR_ASSERT(nWidth % 2 == 0);
|
||||
@ -364,10 +363,7 @@ static inline void general_YUV444ToRGB_DOUBLE_ROW(BYTE* WINPR_RESTRICT pRGB[2],
|
||||
const INT32 avgV = ((4 * v) - subV);
|
||||
v = CONDITIONAL_CLIP(avgV, WINPR_ASSERTING_INT_CAST(BYTE, v));
|
||||
}
|
||||
const BYTE r = YUV2R(y, u, v);
|
||||
const BYTE g = YUV2G(y, u, v);
|
||||
const BYTE b = YUV2B(y, u, v);
|
||||
pRGB[i] = writePixel(pRGB[i], formatSize, DstFormat, r, g, b, 0);
|
||||
pRGB[i] = writeYUVPixel(pRGB[i], DstFormat, y, u, v, writePixel);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -379,17 +375,16 @@ static inline void general_YUV444ToRGB_SINGLE_ROW(BYTE* WINPR_RESTRICT pRGB, UIN
|
||||
const BYTE* WINPR_RESTRICT pV, size_t nWidth)
|
||||
{
|
||||
fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
|
||||
const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
|
||||
|
||||
WINPR_ASSERT(nWidth % 2 == 0);
|
||||
for (size_t x = 0; x < nWidth; x += 2)
|
||||
{
|
||||
for (size_t j = 0; j < 2; j++)
|
||||
{
|
||||
const BYTE r = YUV2R(pY[x + j], pU[x + j], pV[x + j]);
|
||||
const BYTE g = YUV2G(pY[x + j], pU[x + j], pV[x + j]);
|
||||
const BYTE b = YUV2B(pY[x + j], pU[x + j], pV[x + j]);
|
||||
pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
|
||||
const BYTE y = pY[x + j];
|
||||
const BYTE u = pU[x + j];
|
||||
const BYTE v = pV[x + j];
|
||||
pRGB = writeYUVPixel(pRGB, DstFormat, y, u, v, writePixel);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -438,8 +433,6 @@ static inline void general_YUV444ToBGRX_DOUBLE_ROW(BYTE* WINPR_RESTRICT pRGB[2],
|
||||
const BYTE* WINPR_RESTRICT pU[2],
|
||||
const BYTE* WINPR_RESTRICT pV[2], size_t nWidth)
|
||||
{
|
||||
const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
|
||||
|
||||
WINPR_ASSERT(nWidth % 2 == 0);
|
||||
for (size_t x = 0; x < nWidth; x += 2)
|
||||
{
|
||||
@ -457,10 +450,10 @@ static inline void general_YUV444ToBGRX_DOUBLE_ROW(BYTE* WINPR_RESTRICT pRGB[2],
|
||||
{
|
||||
for (size_t j = 0; j < 2; j++)
|
||||
{
|
||||
const BYTE r = YUV2R(pY[i][x + j], U[i][j], V[i][j]);
|
||||
const BYTE g = YUV2G(pY[i][x + j], U[i][j], V[i][j]);
|
||||
const BYTE b = YUV2B(pY[i][x + j], U[i][j], V[i][j]);
|
||||
pRGB[i] = writePixelBGRX(pRGB[i], formatSize, DstFormat, r, g, b, 0);
|
||||
const BYTE y = pY[i][x + j];
|
||||
const BYTE u = U[i][j];
|
||||
const BYTE v = V[i][j];
|
||||
pRGB[i] = writeYUVPixel(pRGB[i], DstFormat, y, u, v, writePixelBGRX);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -471,17 +464,15 @@ static inline void general_YUV444ToBGRX_SINGLE_ROW(BYTE* WINPR_RESTRICT pRGB, UI
|
||||
const BYTE* WINPR_RESTRICT pU,
|
||||
const BYTE* WINPR_RESTRICT pV, size_t nWidth)
|
||||
{
|
||||
const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
|
||||
|
||||
WINPR_ASSERT(nWidth % 2 == 0);
|
||||
for (size_t x = 0; x < nWidth; x += 2)
|
||||
{
|
||||
for (size_t j = 0; j < 2; j++)
|
||||
{
|
||||
const BYTE r = YUV2R(pY[x + j], pU[x + j], pV[x + j]);
|
||||
const BYTE g = YUV2G(pY[x + j], pU[x + j], pV[x + j]);
|
||||
const BYTE b = YUV2B(pY[x + j], pU[x + j], pV[x + j]);
|
||||
pRGB = writePixelBGRX(pRGB, formatSize, DstFormat, r, g, b, 0);
|
||||
const BYTE Y = pY[x + j];
|
||||
const BYTE U = pU[x + j];
|
||||
const BYTE V = pV[x + j];
|
||||
pRGB = writeYUVPixel(pRGB, DstFormat, Y, U, V, writePixelBGRX);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -548,124 +539,90 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc[3
|
||||
UINT32 dstStep, UINT32 DstFormat,
|
||||
const prim_size_t* WINPR_RESTRICT roi)
|
||||
{
|
||||
UINT32 dstPad = 0;
|
||||
UINT32 srcPad[3];
|
||||
BYTE Y = 0;
|
||||
BYTE U = 0;
|
||||
BYTE V = 0;
|
||||
UINT32 halfWidth = 0;
|
||||
UINT32 halfHeight = 0;
|
||||
const BYTE* pY = NULL;
|
||||
const BYTE* pU = NULL;
|
||||
const BYTE* pV = NULL;
|
||||
BYTE* pRGB = pDst;
|
||||
UINT32 nWidth = 0;
|
||||
UINT32 nHeight = 0;
|
||||
UINT32 lastRow = 0;
|
||||
UINT32 lastCol = 0;
|
||||
WINPR_ASSERT(roi);
|
||||
const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
|
||||
fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
|
||||
pY = pSrc[0];
|
||||
pU = pSrc[1];
|
||||
pV = pSrc[2];
|
||||
lastCol = roi->width & 0x01;
|
||||
lastRow = roi->height & 0x01;
|
||||
nWidth = (roi->width + 1) & (uint32_t)~0x0001;
|
||||
nHeight = (roi->height + 1) & (uint32_t)~0x0001;
|
||||
halfWidth = nWidth / 2;
|
||||
halfHeight = nHeight / 2;
|
||||
srcPad[0] = (srcStep[0] - nWidth);
|
||||
srcPad[1] = (srcStep[1] - halfWidth);
|
||||
srcPad[2] = (srcStep[2] - halfWidth);
|
||||
dstPad = (dstStep - (nWidth * 4));
|
||||
UINT32 lastCol = roi->width & 0x01;
|
||||
UINT32 lastRow = roi->height & 0x01;
|
||||
const UINT32 nWidth = (roi->width + 1) & (uint32_t)~0x0001;
|
||||
const UINT32 nHeight = (roi->height + 1) & (uint32_t)~0x0001;
|
||||
const UINT32 halfWidth = nWidth / 2;
|
||||
const UINT32 halfHeight = nHeight / 2;
|
||||
|
||||
for (UINT32 y = 0; y < halfHeight;)
|
||||
for (UINT32 y = 0; y < halfHeight; y++)
|
||||
{
|
||||
if (++y == halfHeight)
|
||||
const BYTE* pY = &pSrc[0][2ULL * srcStep[0] * y];
|
||||
|
||||
if (y + 1 == halfHeight)
|
||||
lastRow <<= 1;
|
||||
|
||||
for (UINT32 x = 0; x < halfWidth;)
|
||||
{
|
||||
BYTE r = 0;
|
||||
BYTE g = 0;
|
||||
BYTE b = 0;
|
||||
|
||||
if (++x == halfWidth)
|
||||
lastCol <<= 1;
|
||||
|
||||
U = *pU++;
|
||||
V = *pV++;
|
||||
/* 1st pixel */
|
||||
Y = *pY++;
|
||||
r = YUV2R(Y, U, V);
|
||||
g = YUV2G(Y, U, V);
|
||||
b = YUV2B(Y, U, V);
|
||||
pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
|
||||
|
||||
/* 2nd pixel */
|
||||
if (!(lastCol & 0x02))
|
||||
const BYTE* pU = &pSrc[1][1ULL * srcStep[1] * y];
|
||||
const BYTE* pV = &pSrc[2][1ULL * srcStep[2] * y];
|
||||
BYTE* pRGBeven = &pDst[2ULL * y * dstStep];
|
||||
for (UINT32 x = 0; x < halfWidth;)
|
||||
{
|
||||
Y = *pY++;
|
||||
r = YUV2R(Y, U, V);
|
||||
g = YUV2G(Y, U, V);
|
||||
b = YUV2B(Y, U, V);
|
||||
pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
pY++;
|
||||
pRGB += formatSize;
|
||||
lastCol >>= 1;
|
||||
if (++x == halfWidth)
|
||||
lastCol <<= 1;
|
||||
|
||||
const BYTE U = *pU++;
|
||||
const BYTE V = *pV++;
|
||||
/* 1st pixel */
|
||||
{
|
||||
const BYTE Y = *pY++;
|
||||
pRGBeven = writeYUVPixel(pRGBeven, DstFormat, Y, U, V, writePixel);
|
||||
}
|
||||
|
||||
/* 2nd pixel */
|
||||
if (!(lastCol & 0x02))
|
||||
{
|
||||
const BYTE Y1 = *pY++;
|
||||
pRGBeven = writeYUVPixel(pRGBeven, DstFormat, Y1, U, V, writePixel);
|
||||
}
|
||||
else
|
||||
{
|
||||
pY++;
|
||||
pRGBeven += formatSize;
|
||||
lastCol >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pY += srcPad[0];
|
||||
pU -= halfWidth;
|
||||
pV -= halfWidth;
|
||||
pRGB += dstPad;
|
||||
|
||||
if (lastRow & 0x02)
|
||||
break;
|
||||
|
||||
for (UINT32 x = 0; x < halfWidth;)
|
||||
{
|
||||
BYTE r = 0;
|
||||
BYTE g = 0;
|
||||
BYTE b = 0;
|
||||
const BYTE* pU = &pSrc[1][1ULL * srcStep[1] * y];
|
||||
const BYTE* pV = &pSrc[2][1ULL * srcStep[2] * y];
|
||||
BYTE* pRGBodd = &pDst[(2ULL * y + 1ULL) * dstStep];
|
||||
|
||||
if (++x == halfWidth)
|
||||
lastCol <<= 1;
|
||||
|
||||
U = *pU++;
|
||||
V = *pV++;
|
||||
/* 3rd pixel */
|
||||
Y = *pY++;
|
||||
r = YUV2R(Y, U, V);
|
||||
g = YUV2G(Y, U, V);
|
||||
b = YUV2B(Y, U, V);
|
||||
pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
|
||||
|
||||
/* 4th pixel */
|
||||
if (!(lastCol & 0x02))
|
||||
for (UINT32 x = 0; x < halfWidth;)
|
||||
{
|
||||
Y = *pY++;
|
||||
r = YUV2R(Y, U, V);
|
||||
g = YUV2G(Y, U, V);
|
||||
b = YUV2B(Y, U, V);
|
||||
pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
pY++;
|
||||
pRGB += formatSize;
|
||||
lastCol >>= 1;
|
||||
if (++x == halfWidth)
|
||||
lastCol <<= 1;
|
||||
|
||||
const BYTE U = *pU++;
|
||||
const BYTE V = *pV++;
|
||||
/* 3rd pixel */
|
||||
{
|
||||
const BYTE Y = *pY++;
|
||||
pRGBodd = writeYUVPixel(pRGBodd, DstFormat, Y, U, V, writePixel);
|
||||
}
|
||||
|
||||
/* 4th pixel */
|
||||
if (!(lastCol & 0x02))
|
||||
{
|
||||
const BYTE Y1 = *pY++;
|
||||
pRGBodd = writeYUVPixel(pRGBodd, DstFormat, Y1, U, V, writePixel);
|
||||
}
|
||||
else
|
||||
{
|
||||
pY++;
|
||||
pRGBodd += formatSize;
|
||||
lastCol >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pY += srcPad[0];
|
||||
pU += srcPad[1];
|
||||
pV += srcPad[2];
|
||||
pRGB += dstPad;
|
||||
}
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
@ -2351,6 +2308,7 @@ void primitives_init_YUV(primitives_t* WINPR_RESTRICT prims)
|
||||
|
||||
void primitives_init_YUV_opt(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_YUV(prims);
|
||||
primitives_init_YUV_sse41(prims);
|
||||
primitives_init_YUV_neon(prims);
|
||||
}
|
||||
|
@ -32,8 +32,6 @@
|
||||
FREERDP_LOCAL void primitives_init_YUV_sse41_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_YUV_sse41(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_YUV(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresentEx(PF_EX_SSE41) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
@ -44,8 +42,6 @@ static inline void primitives_init_YUV_sse41(primitives_t* WINPR_RESTRICT prims)
|
||||
FREERDP_LOCAL void primitives_init_YUV_neon_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_YUV_neon(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_YUV(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresentEx(PF_EX_SSE41))
|
||||
return;
|
||||
|
||||
|
@ -78,5 +78,6 @@ void primitives_init_add(primitives_t* WINPR_RESTRICT prims)
|
||||
|
||||
void primitives_init_add_opt(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_add(prims);
|
||||
primitives_init_add_sse3(prims);
|
||||
}
|
||||
|
@ -32,8 +32,6 @@
|
||||
FREERDP_LOCAL void primitives_init_add_sse3_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_add(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
|
||||
return;
|
||||
|
@ -93,5 +93,6 @@ void primitives_init_alphaComp(primitives_t* WINPR_RESTRICT prims)
|
||||
|
||||
void primitives_init_alphaComp_opt(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_alphaComp(prims);
|
||||
primitives_init_alphaComp_sse3(prims);
|
||||
}
|
||||
|
@ -32,8 +32,6 @@
|
||||
FREERDP_LOCAL void primitives_init_alphaComp_sse3_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_alphaComp(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
|
||||
return;
|
||||
|
@ -61,5 +61,6 @@ void primitives_init_andor(primitives_t* WINPR_RESTRICT prims)
|
||||
|
||||
void primitives_init_andor_opt(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_andor(prims);
|
||||
primitives_init_andor_sse3(prims);
|
||||
}
|
||||
|
@ -32,8 +32,6 @@
|
||||
FREERDP_LOCAL void primitives_init_andor_sse3_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_andor(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
@ -570,6 +570,7 @@ void primitives_init_colors(primitives_t* WINPR_RESTRICT prims)
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_colors_opt(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_colors(prims);
|
||||
primitives_init_colors_sse2(prims);
|
||||
primitives_init_colors_neon(prims);
|
||||
}
|
||||
|
@ -32,8 +32,6 @@
|
||||
FREERDP_LOCAL void primitives_init_colors_sse2_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_colors_sse2(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_colors(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
@ -44,7 +42,6 @@ static inline void primitives_init_colors_sse2(primitives_t* WINPR_RESTRICT prim
|
||||
FREERDP_LOCAL void primitives_init_colors_neon_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_colors_neon(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_colors(prims);
|
||||
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
||||
|
@ -424,6 +424,7 @@ void primitives_init_copy(primitives_t* WINPR_RESTRICT prims)
|
||||
|
||||
void primitives_init_copy_opt(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_copy(prims);
|
||||
primitives_init_copy_sse41(prims);
|
||||
#if defined(WITH_AVX2)
|
||||
primitives_init_copy_avx2(prims);
|
||||
|
@ -286,6 +286,17 @@ static INLINE BYTE RGB2V(INT32 R, INT32 G, INT32 B)
|
||||
return WINPR_ASSERTING_INT_CAST(BYTE, val);
|
||||
}
|
||||
|
||||
static inline BYTE* writeYUVPixel(BYTE* dst, UINT32 DstFormat, INT32 y, INT32 u, INT32 v,
|
||||
fkt_writePixel fkt)
|
||||
{
|
||||
WINPR_ASSERT(fkt);
|
||||
const BYTE r = YUV2R(y, u, v);
|
||||
const BYTE g = YUV2G(y, u, v);
|
||||
const BYTE b = YUV2B(y, u, v);
|
||||
const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
|
||||
return fkt(dst, formatSize, DstFormat, r, g, b, 0);
|
||||
}
|
||||
|
||||
FREERDP_LOCAL void general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(
|
||||
size_t offset, const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd,
|
||||
BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2,
|
||||
|
@ -128,5 +128,6 @@ void primitives_init_set(primitives_t* WINPR_RESTRICT prims)
|
||||
|
||||
void primitives_init_set_opt(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_set(prims);
|
||||
primitives_init_set_sse2(prims);
|
||||
}
|
||||
|
@ -32,7 +32,6 @@
|
||||
FREERDP_LOCAL void primitives_init_set_sse2_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_set(prims);
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
@ -145,5 +145,6 @@ void primitives_init_shift(primitives_t* WINPR_RESTRICT prims)
|
||||
|
||||
void primitives_init_shift_opt(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_shift(prims);
|
||||
primitives_init_shift_sse3(prims);
|
||||
}
|
||||
|
@ -31,8 +31,6 @@
|
||||
FREERDP_LOCAL void primitives_init_shift_sse3_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_shift(prims);
|
||||
|
||||
if (!IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
@ -45,5 +45,6 @@ void primitives_init_sign(primitives_t* WINPR_RESTRICT prims)
|
||||
|
||||
void primitives_init_sign_opt(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_sign(prims);
|
||||
primitives_init_sign_ssse3(prims);
|
||||
}
|
||||
|
@ -32,7 +32,6 @@
|
||||
FREERDP_LOCAL void primitives_init_sign_ssse3_int(primitives_t* WINPR_RESTRICT prims);
|
||||
static inline void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims)
|
||||
{
|
||||
primitives_init_sign(prims);
|
||||
if (!IsProcessorFeaturePresentEx(PF_EX_SSSE3) ||
|
||||
!IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return;
|
||||
|
@ -160,10 +160,7 @@ static inline pstatus_t sse41_YUV420ToRGB_BGRX(const BYTE* WINPR_RESTRICT pSrc[]
|
||||
const BYTE Y = *YData++;
|
||||
const BYTE U = *UData;
|
||||
const BYTE V = *VData;
|
||||
const BYTE r = YUV2R(Y, U, V);
|
||||
const BYTE g = YUV2G(Y, U, V);
|
||||
const BYTE b = YUV2B(Y, U, V);
|
||||
dst = (__m128i*)writePixelBGRX((BYTE*)dst, 4, PIXEL_FORMAT_BGRX32, r, g, b, 0);
|
||||
dst = (__m128i*)writeYUVPixel((BYTE*)dst, PIXEL_FORMAT_BGRX32, Y, U, V, writePixelBGRX);
|
||||
|
||||
if (x % 2)
|
||||
{
|
||||
@ -221,10 +218,7 @@ static inline void BGRX_fillRGB(size_t offset, BYTE* WINPR_RESTRICT pRGB[2],
|
||||
V = CONDITIONAL_CLIP(avgV, pV[0][offset]);
|
||||
}
|
||||
|
||||
const BYTE r = YUV2R(Y, U, V);
|
||||
const BYTE g = YUV2G(Y, U, V);
|
||||
const BYTE b = YUV2B(Y, U, V);
|
||||
writePixelBGRX(&pRGB[i][(j + offset) * bpp], bpp, DstFormat, r, g, b, 0);
|
||||
writeYUVPixel(&pRGB[i][(j + offset) * bpp], DstFormat, Y, U, V, writePixelBGRX);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -455,7 +449,6 @@ static inline void BGRX_fillRGB_single(size_t offset, BYTE* WINPR_RESTRICT pRGB,
|
||||
WINPR_ASSERT(pU);
|
||||
WINPR_ASSERT(pV);
|
||||
|
||||
const UINT32 DstFormat = PIXEL_FORMAT_BGRX32;
|
||||
const UINT32 bpp = 4;
|
||||
|
||||
for (size_t j = 0; j < 2; j++)
|
||||
@ -464,10 +457,7 @@ static inline void BGRX_fillRGB_single(size_t offset, BYTE* WINPR_RESTRICT pRGB,
|
||||
BYTE U = pU[offset + j];
|
||||
BYTE V = pV[offset + j];
|
||||
|
||||
const BYTE r = YUV2R(Y, U, V);
|
||||
const BYTE g = YUV2G(Y, U, V);
|
||||
const BYTE b = YUV2B(Y, U, V);
|
||||
writePixelBGRX(&pRGB[(j + offset) * bpp], bpp, DstFormat, r, g, b, 0);
|
||||
writeYUVPixel(&pRGB[(j + offset) * bpp], PIXEL_FORMAT_BGRX32, Y, U, V, writePixelBGRX);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,110 +97,6 @@ static inline void mm_prefetch_buffer(const void* WINPR_RESTRICT buffer, size_t
|
||||
}
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
static pstatus_t
|
||||
sse2_yCbCrToRGB_16s16s_P3P3(const INT16* WINPR_RESTRICT pSrc[3], int srcStep,
|
||||
INT16* WINPR_RESTRICT pDst[3], int dstStep,
|
||||
const prim_size_t* WINPR_RESTRICT roi) /* region of interest */
|
||||
{
|
||||
if (((ULONG_PTR)(pSrc[0]) & 0x0f) || ((ULONG_PTR)(pSrc[1]) & 0x0f) ||
|
||||
((ULONG_PTR)(pSrc[2]) & 0x0f) || ((ULONG_PTR)(pDst[0]) & 0x0f) ||
|
||||
((ULONG_PTR)(pDst[1]) & 0x0f) || ((ULONG_PTR)(pDst[2]) & 0x0f) || (roi->width & 0x07) ||
|
||||
(srcStep & 127) || (dstStep & 127))
|
||||
{
|
||||
/* We can't maintain 16-byte alignment. */
|
||||
return generic->yCbCrToRGB_16s16s_P3P3(pSrc, srcStep, pDst, dstStep, roi);
|
||||
}
|
||||
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max = _mm_set1_epi16(255);
|
||||
const __m128i* y_buf = (const __m128i*)(pSrc[0]);
|
||||
const __m128i* cb_buf = (const __m128i*)(pSrc[1]);
|
||||
const __m128i* cr_buf = (const __m128i*)(pSrc[2]);
|
||||
__m128i* r_buf = (__m128i*)(pDst[0]);
|
||||
__m128i* g_buf = (__m128i*)(pDst[1]);
|
||||
__m128i* b_buf = (__m128i*)(pDst[2]);
|
||||
__m128i r_cr =
|
||||
_mm_set1_epi16(WINPR_ASSERTING_INT_CAST(int16_t, ycbcr_table[14][0])); /* 1.403 << 14 */
|
||||
__m128i g_cb =
|
||||
_mm_set1_epi16(WINPR_ASSERTING_INT_CAST(int16_t, ycbcr_table[14][1])); /* -0.344 << 14 */
|
||||
__m128i g_cr =
|
||||
_mm_set1_epi16(WINPR_ASSERTING_INT_CAST(int16_t, ycbcr_table[14][2])); /* -0.714 << 14 */
|
||||
__m128i b_cb =
|
||||
_mm_set1_epi16(WINPR_ASSERTING_INT_CAST(int16_t, ycbcr_table[14][3])); /* 1.770 << 14 */
|
||||
__m128i c4096 = _mm_set1_epi16(4096);
|
||||
const size_t srcbump = WINPR_ASSERTING_INT_CAST(size_t, srcStep) / sizeof(__m128i);
|
||||
const size_t dstbump = WINPR_ASSERTING_INT_CAST(size_t, dstStep) / sizeof(__m128i);
|
||||
|
||||
mm_prefetch_buffer(y_buf, roi->width, (size_t)srcStep, roi->height);
|
||||
mm_prefetch_buffer(cb_buf, roi->width, (size_t)srcStep, roi->height);
|
||||
mm_prefetch_buffer(cr_buf, roi->width, (size_t)srcStep, roi->height);
|
||||
|
||||
const size_t imax = roi->width * sizeof(INT16) / sizeof(__m128i);
|
||||
|
||||
for (UINT32 yp = 0; yp < roi->height; ++yp)
|
||||
{
|
||||
for (size_t i = 0; i < imax; i++)
|
||||
{
|
||||
/* In order to use SSE2 signed 16-bit integer multiplication
|
||||
* we need to convert the floating point factors to signed int
|
||||
* without losing information.
|
||||
* The result of this multiplication is 32 bit and we have two
|
||||
* SSE instructions that return either the hi or lo word.
|
||||
* Thus we will multiply the factors by the highest possible 2^n,
|
||||
* take the upper 16 bits of the signed 32-bit result
|
||||
* (_mm_mulhi_epi16) and correct this result by multiplying
|
||||
* it by 2^(16-n).
|
||||
*
|
||||
* For the given factors in the conversion matrix the best
|
||||
* possible n is 14.
|
||||
*
|
||||
* Example for calculating r:
|
||||
* r = (y>>5) + 128 + (cr*1.403)>>5 // our base formula
|
||||
* r = (y>>5) + 128 + (HIWORD(cr*(1.403<<14)<<2))>>5 // see above
|
||||
* r = (y+4096)>>5 + (HIWORD(cr*22986)<<2)>>5 // simplification
|
||||
* r = ((y+4096)>>2 + HIWORD(cr*22986)) >> 3
|
||||
*/
|
||||
/* y = (y_r_buf[i] + 4096) >> 2 */
|
||||
__m128i y = LOAD_SI128(y_buf + i);
|
||||
y = _mm_add_epi16(y, c4096);
|
||||
y = _mm_srai_epi16(y, 2);
|
||||
/* cb = cb_g_buf[i]; */
|
||||
__m128i cb = LOAD_SI128(cb_buf + i);
|
||||
/* cr = cr_b_buf[i]; */
|
||||
__m128i cr = LOAD_SI128(cr_buf + i);
|
||||
/* (y + HIWORD(cr*22986)) >> 3 */
|
||||
__m128i r = _mm_add_epi16(y, _mm_mulhi_epi16(cr, r_cr));
|
||||
r = _mm_srai_epi16(r, 3);
|
||||
/* r_buf[i] = CLIP(r); */
|
||||
mm_between_epi16(r, zero, max);
|
||||
STORE_SI128(r_buf + i, r);
|
||||
/* (y + HIWORD(cb*-5636) + HIWORD(cr*-11698)) >> 3 */
|
||||
__m128i g = _mm_add_epi16(y, _mm_mulhi_epi16(cb, g_cb));
|
||||
g = _mm_add_epi16(g, _mm_mulhi_epi16(cr, g_cr));
|
||||
g = _mm_srai_epi16(g, 3);
|
||||
/* g_buf[i] = CLIP(g); */
|
||||
mm_between_epi16(g, zero, max);
|
||||
STORE_SI128(g_buf + i, g);
|
||||
/* (y + HIWORD(cb*28999)) >> 3 */
|
||||
__m128i b = _mm_add_epi16(y, _mm_mulhi_epi16(cb, b_cb));
|
||||
b = _mm_srai_epi16(b, 3);
|
||||
/* b_buf[i] = CLIP(b); */
|
||||
mm_between_epi16(b, zero, max);
|
||||
STORE_SI128(b_buf + i, b);
|
||||
}
|
||||
|
||||
y_buf += srcbump;
|
||||
cb_buf += srcbump;
|
||||
cr_buf += srcbump;
|
||||
r_buf += dstbump;
|
||||
g_buf += dstbump;
|
||||
b_buf += dstbump;
|
||||
}
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
static pstatus_t
|
||||
sse2_yCbCrToRGB_16s8u_P3AC4R_BGRX(const INT16* WINPR_RESTRICT pSrc[3],
|
||||
@ -1150,7 +1046,6 @@ void primitives_init_colors_sse2_int(primitives_t* WINPR_RESTRICT prims)
|
||||
|
||||
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
|
||||
prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R;
|
||||
prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3;
|
||||
prims->yCbCrToRGB_16s8u_P3AC4R = sse2_yCbCrToRGB_16s8u_P3AC4R;
|
||||
prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3;
|
||||
|
||||
|
@ -1001,10 +1001,8 @@ static BOOL yuv444_to_rgb(BYTE* rgb, size_t stride, const BYTE* yuv[3], const UI
|
||||
const BYTE Y = yline[0][x];
|
||||
const BYTE U = yline[1][x];
|
||||
const BYTE V = yline[2][x];
|
||||
const BYTE r = YUV2R(Y, U, V);
|
||||
const BYTE g = YUV2G(Y, U, V);
|
||||
const BYTE b = YUV2B(Y, U, V);
|
||||
writePixelBGRX(&line[x * 4], 4, PIXEL_FORMAT_BGRX32, r, g, b, 0xFF);
|
||||
|
||||
writeYUVPixel(&line[x * 4], PIXEL_FORMAT_BGRX32, Y, U, V, writePixelBGRX);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user