FreeRDP/channels/rdpecam/client/encoding.c
2025-02-10 13:11:16 +01:00

614 lines
15 KiB
C

/**
* FreeRDP: A Remote Desktop Protocol Implementation
* MS-RDPECAM Implementation, Video Encoding
*
* Copyright 2024 Oleg Turovski <oleg2104@hotmail.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <winpr/assert.h>
#include <winpr/winpr.h>
#include "camera.h"
#define TAG CHANNELS_TAG("rdpecam-video.client")
#if defined(WITH_INPUT_FORMAT_H264)
/*
* demux a H264 frame from a MJPG container
* args:
* srcData - pointer to buffer with h264 muxed in MJPG container
* srcSize - buff size
* h264_data - pointer to h264 data
* h264_max_size - maximum size allowed by h264_data buffer
*
* Credits:
* guvcview http://guvcview.sourceforge.net
* Paulo Assis <pj.assis@gmail.com>
*
* see Figure 5 Payload Size in USB_Video_Payload_H 264_1 0.pdf
* for format details
*
* @return: data size and copies demuxed data to h264 buffer
*/
static size_t demux_uvcH264(const BYTE* srcData, size_t srcSize, BYTE* h264_data,
size_t h264_max_size)
{
WINPR_ASSERT(h264_data);
WINPR_ASSERT(srcData);
if (srcSize < 30)
{
WLog_ERR(TAG, "Expected srcSize >= 30, got %" PRIuz, srcSize);
return 0;
}
const uint8_t* spl = NULL;
uint8_t* ph264 = h264_data;
/* search for 1st APP4 marker
* (30 = 2 APP4 marker + 2 length + 22 header + 4 payload size)
*/
for (const uint8_t* sp = srcData; sp < srcData + srcSize - 30; sp++)
{
if (sp[0] == 0xFF && sp[1] == 0xE4)
{
spl = sp + 2; /* exclude APP4 marker */
break;
}
}
if (spl == NULL)
{
WLog_ERR(TAG, "Expected 1st APP4 marker but none found");
return 0;
}
if (spl > srcData + srcSize - 4)
{
WLog_ERR(TAG, "Payload + Header size bigger than srcData buffer");
return 0;
}
/* 1st segment length in big endian
* includes payload size + header + 6 bytes (2 length + 4 payload size)
*/
uint16_t length = (uint16_t)(spl[0] << 8) & UINT16_MAX;
length |= (uint16_t)spl[1];
spl += 2; /* header */
/* header length in little endian at offset 2 */
uint16_t header_length = (uint16_t)spl[2];
header_length |= (uint16_t)spl[3] << 8;
spl += header_length;
if (spl > srcData + srcSize)
{
WLog_ERR(TAG, "Header size bigger than srcData buffer");
return 0;
}
/* payload size in little endian */
uint32_t payload_size = (uint32_t)spl[0] << 0;
payload_size |= (uint32_t)spl[1] << 8;
payload_size |= (uint32_t)spl[2] << 16;
payload_size |= (uint32_t)spl[3] << 24;
if (payload_size > h264_max_size)
{
WLog_ERR(TAG, "Payload size bigger than h264_data buffer");
return 0;
}
spl += 4; /* payload start */
const uint8_t* epl = spl + payload_size; /* payload end */
if (epl > srcData + srcSize)
{
WLog_ERR(TAG, "Payload size bigger than srcData buffer");
return 0;
}
length -= header_length + 6;
/* copy 1st segment to h264 buffer */
memcpy(ph264, spl, length);
ph264 += length;
spl += length;
/* copy other segments */
while (epl > spl + 4)
{
if (spl[0] != 0xFF || spl[1] != 0xE4)
{
WLog_ERR(TAG, "Expected 2nd+ APP4 marker but none found");
const intptr_t diff = ph264 - h264_data;
return WINPR_ASSERTING_INT_CAST(size_t, diff);
}
/* 2nd+ segment length in big endian */
length = (uint16_t)(spl[2] << 8) & UINT16_MAX;
length |= (uint16_t)spl[3];
if (length < 2)
{
WLog_ERR(TAG, "Expected 2nd+ APP4 length >= 2 but have %" PRIu16, length);
return 0;
}
length -= 2;
spl += 4; /* APP4 marker + length */
/* copy segment to h264 buffer */
memcpy(ph264, spl, length);
ph264 += length;
spl += length;
}
const intptr_t diff = ph264 - h264_data;
return WINPR_ASSERTING_INT_CAST(size_t, diff);
}
#endif
/**
* Function description
*
* @return bitrate in bps
*/
UINT32 h264_get_max_bitrate(UINT32 height)
{
static struct Bitrates
{
UINT32 height;
UINT32 bitrate; /* kbps */
} bitrates[] = {
/* source: https://livekit.io/webrtc/bitrate-guide (webcam streaming)
*
* sorted by height in descending order
*/
{ 1080, 2700 }, { 720, 1250 }, { 480, 700 }, { 360, 400 },
{ 240, 170 }, { 180, 140 }, { 0, 100 },
};
const size_t nBitrates = ARRAYSIZE(bitrates);
for (size_t i = 0; i < nBitrates; i++)
{
if (height >= bitrates[i].height)
{
UINT32 bitrate = bitrates[i].bitrate;
WLog_DBG(TAG, "Setting h264 max bitrate: %u kbps", bitrate);
return bitrate * 1000;
}
}
WINPR_ASSERT(FALSE);
return 0;
}
/**
* Function description
*
* @return enum AVPixelFormat value
*/
static enum AVPixelFormat ecamToAVPixFormat(CAM_MEDIA_FORMAT ecamFormat)
{
switch (ecamFormat)
{
case CAM_MEDIA_FORMAT_YUY2:
return AV_PIX_FMT_YUYV422;
case CAM_MEDIA_FORMAT_NV12:
return AV_PIX_FMT_NV12;
case CAM_MEDIA_FORMAT_I420:
return AV_PIX_FMT_YUV420P;
case CAM_MEDIA_FORMAT_RGB24:
return AV_PIX_FMT_RGB24;
case CAM_MEDIA_FORMAT_RGB32:
return AV_PIX_FMT_RGB32;
default:
WLog_ERR(TAG, "Unsupported ecamFormat %d", ecamFormat);
return AV_PIX_FMT_NONE;
}
}
/**
* Function description
* initialize libswscale
*
* @return success/failure
*/
static BOOL ecam_init_sws_context(CameraDeviceStream* stream, enum AVPixelFormat pixFormat)
{
WINPR_ASSERT(stream);
if (stream->sws)
return TRUE;
/* replacing deprecated JPEG formats, still produced by decoder */
switch (pixFormat)
{
case AV_PIX_FMT_YUVJ411P:
pixFormat = AV_PIX_FMT_YUV411P;
break;
case AV_PIX_FMT_YUVJ420P:
pixFormat = AV_PIX_FMT_YUV420P;
break;
case AV_PIX_FMT_YUVJ422P:
pixFormat = AV_PIX_FMT_YUV422P;
break;
case AV_PIX_FMT_YUVJ440P:
pixFormat = AV_PIX_FMT_YUV440P;
break;
case AV_PIX_FMT_YUVJ444P:
pixFormat = AV_PIX_FMT_YUV444P;
break;
default:
break;
}
const int width = (int)stream->currMediaType.Width;
const int height = (int)stream->currMediaType.Height;
const enum AVPixelFormat outPixFormat =
h264_context_get_option(stream->h264, H264_CONTEXT_OPTION_HW_ACCEL) ? AV_PIX_FMT_NV12
: AV_PIX_FMT_YUV420P;
stream->sws =
sws_getContext(width, height, pixFormat, width, height, outPixFormat, 0, NULL, NULL, NULL);
if (!stream->sws)
{
WLog_ERR(TAG, "sws_getContext failed");
return FALSE;
}
return TRUE;
}
/**
* Function description
*
* @return success/failure
*/
static BOOL ecam_encoder_compress_h264(CameraDeviceStream* stream, const BYTE* srcData,
size_t srcSize, BYTE** ppDstData, size_t* pDstSize)
{
UINT32 dstSize = 0;
BYTE* srcSlice[4] = { 0 };
int srcLineSizes[4] = { 0 };
BYTE* yuvData[3] = { 0 };
UINT32 yuvLineSizes[3] = { 0 };
prim_size_t size = { stream->currMediaType.Width, stream->currMediaType.Height };
CAM_MEDIA_FORMAT inputFormat = streamInputFormat(stream);
enum AVPixelFormat pixFormat = AV_PIX_FMT_NONE;
#if defined(WITH_INPUT_FORMAT_H264)
if (inputFormat == CAM_MEDIA_FORMAT_MJPG_H264)
{
const size_t rc =
demux_uvcH264(srcData, srcSize, stream->h264Frame, stream->h264FrameMaxSize);
dstSize = WINPR_ASSERTING_INT_CAST(uint32_t, rc);
*ppDstData = stream->h264Frame;
*pDstSize = dstSize;
return dstSize > 0;
}
else
#endif
#if defined(WITH_INPUT_FORMAT_MJPG)
if (inputFormat == CAM_MEDIA_FORMAT_MJPG)
{
stream->avInputPkt->data = WINPR_CAST_CONST_PTR_AWAY(srcData, uint8_t*);
WINPR_ASSERT(srcSize <= INT32_MAX);
stream->avInputPkt->size = (int)srcSize;
if (avcodec_send_packet(stream->avContext, stream->avInputPkt) < 0)
{
WLog_ERR(TAG, "avcodec_send_packet failed");
return FALSE;
}
if (avcodec_receive_frame(stream->avContext, stream->avOutFrame) < 0)
{
WLog_ERR(TAG, "avcodec_receive_frame failed");
return FALSE;
}
for (size_t i = 0; i < 4; i++)
{
srcSlice[i] = stream->avOutFrame->data[i];
srcLineSizes[i] = stream->avOutFrame->linesize[i];
}
/* get pixFormat produced by MJPEG decoder */
pixFormat = stream->avContext->pix_fmt;
}
else
#endif
{
pixFormat = ecamToAVPixFormat(inputFormat);
if (av_image_fill_linesizes(srcLineSizes, pixFormat, (int)size.width) < 0)
{
WLog_ERR(TAG, "av_image_fill_linesizes failed");
return FALSE;
}
if (av_image_fill_pointers(srcSlice, pixFormat, (int)size.height,
WINPR_CAST_CONST_PTR_AWAY(srcData, BYTE*), srcLineSizes) < 0)
{
WLog_ERR(TAG, "av_image_fill_pointers failed");
return FALSE;
}
}
/* get buffers for YUV420P or NV12 */
if (h264_get_yuv_buffer(stream->h264, 0, size.width, size.height, yuvData, yuvLineSizes) < 0)
return FALSE;
/* convert from source format to YUV420P or NV12 */
if (!ecam_init_sws_context(stream, pixFormat))
return FALSE;
const BYTE* cSrcSlice[4] = { srcSlice[0], srcSlice[1], srcSlice[2], srcSlice[3] };
if (sws_scale(stream->sws, cSrcSlice, srcLineSizes, 0, (int)size.height, yuvData,
(int*)yuvLineSizes) <= 0)
return FALSE;
/* encode from YUV420P or NV12 to H264 */
if (h264_compress(stream->h264, ppDstData, &dstSize) < 0)
return FALSE;
*pDstSize = dstSize;
return TRUE;
}
/**
* Function description
*
*/
static void ecam_encoder_context_free_h264(CameraDeviceStream* stream)
{
WINPR_ASSERT(stream);
if (stream->sws)
{
sws_freeContext(stream->sws);
stream->sws = NULL;
}
#if defined(WITH_INPUT_FORMAT_MJPG)
if (stream->avOutFrame)
av_frame_free(&stream->avOutFrame); /* sets to NULL */
if (stream->avInputPkt)
{
stream->avInputPkt->data = NULL;
stream->avInputPkt->size = 0;
av_packet_free(&stream->avInputPkt); /* sets to NULL */
}
if (stream->avContext)
avcodec_free_context(&stream->avContext); /* sets to NULL */
#endif
#if defined(WITH_INPUT_FORMAT_H264)
if (stream->h264Frame)
{
free(stream->h264Frame);
stream->h264Frame = NULL;
}
#endif
if (stream->h264)
{
h264_context_free(stream->h264);
stream->h264 = NULL;
}
}
#if defined(WITH_INPUT_FORMAT_MJPG)
/**
* Function description
*
* @return success/failure
*/
static BOOL ecam_init_mjpeg_decoder(CameraDeviceStream* stream)
{
WINPR_ASSERT(stream);
const AVCodec* avcodec = avcodec_find_decoder(AV_CODEC_ID_MJPEG);
if (!avcodec)
{
WLog_ERR(TAG, "avcodec_find_decoder failed to find MJPEG codec");
return FALSE;
}
stream->avContext = avcodec_alloc_context3(avcodec);
if (!stream->avContext)
{
WLog_ERR(TAG, "avcodec_alloc_context3 failed");
return FALSE;
}
stream->avContext->width = WINPR_ASSERTING_INT_CAST(int, stream->currMediaType.Width);
stream->avContext->height = WINPR_ASSERTING_INT_CAST(int, stream->currMediaType.Height);
/* AV_EF_EXPLODE flag is to abort decoding on minor error detection,
* return error, so we can skip corrupted frames, if any */
stream->avContext->err_recognition |= AV_EF_EXPLODE;
if (avcodec_open2(stream->avContext, avcodec, NULL) < 0)
{
WLog_ERR(TAG, "avcodec_open2 failed");
return FALSE;
}
stream->avInputPkt = av_packet_alloc();
if (!stream->avInputPkt)
{
WLog_ERR(TAG, "av_packet_alloc failed");
return FALSE;
}
stream->avOutFrame = av_frame_alloc();
if (!stream->avOutFrame)
{
WLog_ERR(TAG, "av_frame_alloc failed");
return FALSE;
}
return TRUE;
}
#endif
/**
* Function description
*
* @return success/failure
*/
static BOOL ecam_encoder_context_init_h264(CameraDeviceStream* stream)
{
WINPR_ASSERT(stream);
#if defined(WITH_INPUT_FORMAT_H264)
if (streamInputFormat(stream) == CAM_MEDIA_FORMAT_MJPG_H264)
{
stream->h264FrameMaxSize = 1ULL * stream->currMediaType.Width *
stream->currMediaType.Height; /* 1 byte per pixel */
stream->h264Frame = (BYTE*)calloc(stream->h264FrameMaxSize, sizeof(BYTE));
return TRUE; /* encoder not needed */
}
#endif
if (!stream->h264)
stream->h264 = h264_context_new(TRUE);
if (!stream->h264)
{
WLog_ERR(TAG, "h264_context_new failed");
return FALSE;
}
if (!h264_context_set_option(stream->h264, H264_CONTEXT_OPTION_USAGETYPE,
H264_CAMERA_VIDEO_REAL_TIME))
goto fail;
if (!h264_context_set_option(stream->h264, H264_CONTEXT_OPTION_FRAMERATE,
stream->currMediaType.FrameRateNumerator /
stream->currMediaType.FrameRateDenominator))
goto fail;
if (!h264_context_set_option(stream->h264, H264_CONTEXT_OPTION_BITRATE,
h264_get_max_bitrate(stream->currMediaType.Height)))
goto fail;
/* Using CQP mode for rate control. It produces more comparable quality
* between VAAPI and software encoding than VBR mode
*/
if (!h264_context_set_option(stream->h264, H264_CONTEXT_OPTION_RATECONTROL,
H264_RATECONTROL_CQP))
goto fail;
/* Using 26 as CQP value. Lower values will produce better quality but
* higher bitrate; higher values - lower bitrate but degraded quality
*/
if (!h264_context_set_option(stream->h264, H264_CONTEXT_OPTION_QP, 26))
goto fail;
/* Requesting hardware acceleration before calling h264_context_reset */
if (!h264_context_set_option(stream->h264, H264_CONTEXT_OPTION_HW_ACCEL, TRUE))
goto fail;
if (!h264_context_reset(stream->h264, stream->currMediaType.Width,
stream->currMediaType.Height))
{
WLog_ERR(TAG, "h264_context_reset failed");
goto fail;
}
#if defined(WITH_INPUT_FORMAT_MJPG)
if (streamInputFormat(stream) == CAM_MEDIA_FORMAT_MJPG && !ecam_init_mjpeg_decoder(stream))
goto fail;
#endif
return TRUE;
fail:
ecam_encoder_context_free_h264(stream);
return FALSE;
}
/**
* Function description
*
* @return success/failure
*/
BOOL ecam_encoder_context_init(CameraDeviceStream* stream)
{
CAM_MEDIA_FORMAT format = streamOutputFormat(stream);
switch (format)
{
case CAM_MEDIA_FORMAT_H264:
return ecam_encoder_context_init_h264(stream);
default:
WLog_ERR(TAG, "Unsupported output format %d", format);
return FALSE;
}
}
/**
* Function description
*
* @return success/failure
*/
BOOL ecam_encoder_context_free(CameraDeviceStream* stream)
{
CAM_MEDIA_FORMAT format = streamOutputFormat(stream);
switch (format)
{
case CAM_MEDIA_FORMAT_H264:
ecam_encoder_context_free_h264(stream);
break;
default:
return FALSE;
}
return TRUE;
}
/**
* Function description
*
* @return success/failure
*/
BOOL ecam_encoder_compress(CameraDeviceStream* stream, const BYTE* srcData, size_t srcSize,
BYTE** ppDstData, size_t* pDstSize)
{
CAM_MEDIA_FORMAT format = streamOutputFormat(stream);
switch (format)
{
case CAM_MEDIA_FORMAT_H264:
return ecam_encoder_compress_h264(stream, srcData, srcSize, ppDstData, pDstSize);
default:
WLog_ERR(TAG, "Unsupported output format %d", format);
return FALSE;
}
}