/*******************************************************************************
 * Copyright 2020 Intel Corporation.
 *
 *
 * This software and the related documents are Intel copyrighted materials, and your use of them is governed by
 * the express license under which they were provided to you ('License'). Unless the License provides otherwise,
 * you may not use, modify, copy, publish, distribute, disclose or transmit this software or the related
 * documents without Intel's prior written permission.
 * This software and the related documents are provided as is, with no express or implied warranties, other than
 * those that are expressly stated in the License.
 *******************************************************************************/

/* Intel(R) Integrated Performance Primitives (Intel(R) IPP) */

#include "pifmedian_t.h"

/* /////////////////////////////////////////////////////////////////////////////
//  Name:               ownGetMedianSliceSize
//
//  Purpose:            Splits image to equal slices depending on ROI size and number of threads.
//                      Last slice can be slightly bigger than others.
//
//  Parameters:
//   roiSize            Size of destination ROI in pixels.
//   maskSize           Size of filter mask.
//   pTileSize          Size of single slice
//   lastTileSize       Size of last slice
//   splitImage         Amount of splits in x- and y- directions
//
//  Return Values:
//   void
*/
void ownGetMedianSliceSize(IppiSize roiSize, IppiSize maskSize, IppiSize *pTileSize, IppiSize *lastTileSize, IppiPoint *splitImage)
{
    IppiSize sliceSize;
    (*splitImage).x = (*splitImage).y = 1;

    sliceSize.height = IPP_MIN(TYLE_S, roiSize.height);
    sliceSize.width = roiSize.width;

    /* split the image to tiles */
    ippiSplitToTiles_T(roiSize, sliceSize, splitImage, pTileSize, lastTileSize);
}

/* /////////////////////////////////////////////////////////////////////////////
//  Name:               ippiFilterMedianBorderGetBufferSize_T
//
//  Purpose:            Computes the size of the external buffer for median filter with border
//
//  Parameters:
//   roiSize            Size of destination ROI in pixels.
//   maskSize           Size of filter mask.
//   dataType           Data type of the source an desination images.
//   numChannels        Number of channels in the images. Possible values are 1, 3 or 4.
//   pBufferSize        Pointer to the size (in bytes) of the external work buffer.
//
//  Return Values:
//   ippStsNoErr        Indicates no error.
//   ippStsNullPtrErr   Indicates an error when pBufferSize is NULL.
//   ippStsSizeErr      Indicates an error when roiSize has a field with negative or zero value.
//   ippStsMaskSizeErr  Indicates an error when maskSize has a field with negative, zero or even value.
//   ippStsDataTypeErr  Indicates an error when dataType has an illegal value.
//   ippStsNumChannelsErr Indicates an error when numChannels has an illegal value.
*/
IPPFUN(IppStatus, ippiFilterMedianBorderGetBufferSize_T,
       (IppiSize roiSize, IppiSize maskSize, IppDataType dataType, int numChannels, int *pBufferSize))
{
    IppStatus status = ippStsNoErr;

    if (pBufferSize == 0)
        return ippStsNullPtrErr;
    if (roiSize.width <= 0 || roiSize.height <= 0)
        return ippStsSizeErr;
    if ((maskSize.width <= 0) || (maskSize.height <= 0))
        return ippStsMaskSizeErr;
    if (!(maskSize.width & maskSize.height & 1))
        return ippStsMaskSizeErr;
    if ((numChannels != 1) && (numChannels != 3) && (numChannels != 4))
        return ippStsNumChannelsErr;
    if (numChannels == 1) {
        if ((dataType != ipp8u) && (dataType != ipp16s) && (dataType != ipp16u) && (dataType != ipp32f))
            return ippStsDataTypeErr;
    } else {
        if ((dataType != ipp8u) && (dataType != ipp16s) && (dataType != ipp16u))
            return ippStsDataTypeErr;
    }

    Ipp32s numThreads = 1;
    IppiSize sliceSize, lastSliceSize;
    IppiPoint splitImage;

    int bufferSize = 0;

    ippGetNumThreads_T(&numThreads);

    ownGetMedianSliceSize(roiSize, maskSize, &sliceSize, &lastSliceSize, &splitImage);

    int channelMult = (numChannels == 1) ? 2 : 1;

    Ipp8u disableThreading =
        ((splitImage.y == 1) || ((MASK_SIZE_P1 < SMALL_MASK_SIZE_LIMIT) && (roiSize.height < (WIDTH_LIMIT_1 * channelMult))) ||
         ((SMALL_MASK_SIZE_LIMIT <= MASK_SIZE_P1) && (MASK_SIZE_P1 < MED_MASK_SIZE_LIMIT) && (roiSize.height < (WIDTH_LIMIT_2 * channelMult))) ||
         ((MED_MASK_SIZE_LIMIT <= MASK_SIZE_P1) && (MASK_SIZE_P1 < LARGE_MASK_SIZE_LIMIT) && (roiSize.height < (WIDTH_LIMIT_3 * channelMult))) ||
         ((MASK_SIZE_P1 >= LARGE_MASK_SIZE_LIMIT) && (roiSize.height < (WIDTH_LIMIT_4))));

    if ((numThreads == 1) || disableThreading) {
        status = ippiFilterMedianBorderGetBufferSize(roiSize, maskSize, dataType, numChannels, pBufferSize);
    } else {
        IppiSize maxSliceSize = {IPP_MAX(sliceSize.width, lastSliceSize.width), IPP_MAX(sliceSize.height, lastSliceSize.height)};

        status = ippiFilterMedianBorderGetBufferSize(maxSliceSize, maskSize, dataType, numChannels, &bufferSize);

        if (status >= 0) {
            *pBufferSize = bufferSize * ((int)numThreads);
        }
    }

    return status;
}

/* /////////////////////////////////////////////////////////////////////////////
//  Name:               ippiFilterMedianBorder_8u_C1R_T_Fun
//                      ippiFilterMedianBorder_8u_C3R_T_Fun
//                      ippiFilterMedianBorder_8u_AC4R_T_Fun
//                      ippiFilterMedianBorder_8u_C4R_T_Fun
//
//  Purpose:            Kernels to be called in parallel_for of Threading Layer - performs median filtering
//                      of particular slice of the image.
//
//  Parameters:
//   t                  thread index
//   arg                pointer to the Filter Median threading structure
//
//  Return Values:
//   ippStsNoErr        Indicates no error.
//   ippStsNullPtrErr   Indicates an error when pSrc, pDst or pBufferSize is NULL.
//   ippStsSizeErr      Indicates an error when roiSize has a field with negative or zero value.
//   ippStsMaskSizeErr  Indicates an error when maskSize has a field with negative, zero or even value.
//   ippStsNotEvenStepErr Indicated an error when one of the step values is not divisible by 4
//                      for floating-point images, or by 2 for short-integer images.
//   ippStsBorderErr    Indicates an error when borderType has illegal value.
*/
static IppStatus ippiFilterMedianBorder_8u_C1R_T_Fun(int t, void *arg)
{
    IppStatus status = ippStsNoErr;

    ippiFilterMedian_8u_C1R_T_Str *ts = (ippiFilterMedian_8u_C1R_T_Str *)arg;

    const Ipp8u *pSrc = (const Ipp8u *)ts->pSrc;
    int srcStep = ts->srcStep;
    Ipp8u *pDst = ts->pDst;
    int dstStep = ts->dstStep;
    IppiSize maskSize = ts->maskSize;
    IppiBorderType border = ts->borderType;
    Ipp8u borderValue = ts->borderValue;
    Ipp8u *pBuffer = ts->pBuffer;
    IppiPoint splitImage = ts->splitImage;
    IppiSize sliceSize = ts->sliceSize;
    IppiSize lastSliceSize = ts->lastSliceSize;
    int sliceBufferSize = ts->sliceBufferSize;

    int tWidth = sliceSize.width;
    int tHeight = sliceSize.height;

    IppiSize roiSize;
    int tx, ty; /* slice coordinates */
    IppiBorderType borderTrd = border;
    IppiBorderType borderTrdW = borderTrd;

    int threadIdx = 0;
    ippGetThreadIdx_T(&threadIdx);

    ty = t / splitImage.x;
    tx = t % splitImage.x;

    roiSize.height = tHeight;
    if (lastSliceSize.height && (ty == (int)(splitImage.y - 1)))
        roiSize.height = lastSliceSize.height;
    roiSize.width = tWidth;
    if (lastSliceSize.width && (tx == (int)(splitImage.x - 1)))
        roiSize.width = lastSliceSize.width;

    pBuffer = pBuffer + sliceBufferSize * threadIdx;

    Ipp8u *pSliceSrc = (Ipp8u *)((Ipp8u *)(pSrc + tx * tWidth) + ty * tHeight * srcStep);
    Ipp8u *pSliceDst = (Ipp8u *)((Ipp8u *)(pDst + tx * tWidth) + ty * tHeight * dstStep);

    if ((splitImage.y > 1)) {
        if (ty == 0)
            borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom);
        else if (ty == (int)(splitImage.y - 1))
            borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemTop);
        else
            borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom | (int)ippBorderInMemTop);
    }
    borderTrdW = borderTrd;
    if ((splitImage.x > 1)) {
        if (tx == 0)
            borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight);
        else if (tx == (int)(splitImage.x - 1))
            borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemLeft);
        else
            borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight | (int)ippBorderInMemLeft);
    }

    /* Intel IPP function call */
    status = ippiFilterMedianBorder_8u_C1R((const Ipp8u *)pSliceSrc, srcStep, (Ipp8u *)pSliceDst, dstStep, roiSize, maskSize, borderTrdW, borderValue,
                                           pBuffer);
    return status;
}

static IppStatus ippiFilterMedianBorder_8u_C3R_T_Fun(int t, void *arg)
{
    IppStatus status = ippStsNoErr;

    ippiFilterMedian_8u_C3R_AC4R_T_Str *ts = (ippiFilterMedian_8u_C3R_AC4R_T_Str *)arg;

    const Ipp8u *pSrc = (const Ipp8u *)ts->pSrc;
    int srcStep = ts->srcStep;
    Ipp8u *pDst = ts->pDst;
    int dstStep = ts->dstStep;
    IppiSize maskSize = ts->maskSize;
    IppiBorderType border = ts->borderType;
    Ipp8u borderValue[3] = {ts->borderValue[0], ts->borderValue[1], ts->borderValue[2]};
    Ipp8u *pBuffer = ts->pBuffer;
    IppiPoint splitImage = ts->splitImage;
    IppiSize sliceSize = ts->sliceSize;
    IppiSize lastSliceSize = ts->lastSliceSize;
    int sliceBufferSize = ts->sliceBufferSize;

    int tWidth = sliceSize.width;
    int tHeight = sliceSize.height;

    IppiSize roiSize;
    int numChannels = 3;
    int tx, ty; /* slice coordinates */
    IppiBorderType borderTrd = border;
    IppiBorderType borderTrdW = borderTrd;

    int threadIdx = 0;
    ippGetThreadIdx_T(&threadIdx);

    ty = t / splitImage.x;
    tx = t % splitImage.x;

    roiSize.height = tHeight;
    if (lastSliceSize.height && (ty == (int)(splitImage.y - 1)))
        roiSize.height = lastSliceSize.height;
    roiSize.width = tWidth;
    if (lastSliceSize.width && (tx == (int)(splitImage.x - 1)))
        roiSize.width = lastSliceSize.width;

    pBuffer = pBuffer + sliceBufferSize * threadIdx;

    Ipp8u *pSliceSrc = (Ipp8u *)((Ipp8u *)(pSrc + tx * tWidth * numChannels) + ty * tHeight * srcStep);
    Ipp8u *pSliceDst = (Ipp8u *)((Ipp8u *)(pDst + tx * tWidth * numChannels) + ty * tHeight * dstStep);

    if ((splitImage.y > 1)) {
        if (ty == 0)
            borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom);
        else if (ty == (int)(splitImage.y - 1))
            borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemTop);
        else
            borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom | (int)ippBorderInMemTop);
    }
    borderTrdW = borderTrd;
    if ((splitImage.x > 1)) {
        if (tx == 0)
            borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight);
        else if (tx == (int)(splitImage.x - 1))
            borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemLeft);
        else
            borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight | (int)ippBorderInMemLeft);
    }

    /* Intel IPP function call */
    status = ippiFilterMedianBorder_8u_C3R((const Ipp8u *)pSliceSrc, srcStep, (Ipp8u *)pSliceDst, dstStep, roiSize, maskSize, borderTrdW, borderValue,
                                           pBuffer);
    return status;
}

static IppStatus ippiFilterMedianBorder_8u_AC4R_T_Fun(int t, void *arg)
{
    IppStatus status = ippStsNoErr;

    ippiFilterMedian_8u_C3R_AC4R_T_Str *ts = (ippiFilterMedian_8u_C3R_AC4R_T_Str *)arg;

    const Ipp8u *pSrc = (const Ipp8u *)ts->pSrc;
    int srcStep = ts->srcStep;
    Ipp8u *pDst = ts->pDst;
    int dstStep = ts->dstStep;
    IppiSize maskSize = ts->maskSize;
    IppiBorderType border = ts->borderType;
    Ipp8u borderValue[3] = {ts->borderValue[0], ts->borderValue[1], ts->borderValue[2]};
    Ipp8u *pBuffer = ts->pBuffer;
    IppiPoint splitImage = ts->splitImage;
    IppiSize sliceSize = ts->sliceSize;
    IppiSize lastSliceSize = ts->lastSliceSize;
    int sliceBufferSize = ts->sliceBufferSize;

    int tWidth = sliceSize.width;
    int tHeight = sliceSize.height;

    IppiSize roiSize;
    int numChannels = 4;
    int tx, ty; /* slice coordinates */
    IppiBorderType borderTrd = border;
    IppiBorderType borderTrdW = borderTrd;

    int threadIdx = 0;
    ippGetThreadIdx_T(&threadIdx);

    ty = t / splitImage.x;
    tx = t % splitImage.x;

    roiSize.height = tHeight;
    if (lastSliceSize.height && (ty == (int)(splitImage.y - 1)))
        roiSize.height = lastSliceSize.height;
    roiSize.width = tWidth;
    if (lastSliceSize.width && (tx == (int)(splitImage.x - 1)))
        roiSize.width = lastSliceSize.width;

    pBuffer = pBuffer + sliceBufferSize * threadIdx;

    Ipp8u *pSliceSrc = (Ipp8u *)((Ipp8u *)(pSrc + tx * tWidth * numChannels) + ty * tHeight * srcStep);
    Ipp8u *pSliceDst = (Ipp8u *)((Ipp8u *)(pDst + tx * tWidth * numChannels) + ty * tHeight * dstStep);

    if ((splitImage.y > 1)) {
        if (ty == 0)
            borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom);
        else if (ty == (int)(splitImage.y - 1))
            borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemTop);
        else
            borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom | (int)ippBorderInMemTop);
    }
    borderTrdW = borderTrd;
    if ((splitImage.x > 1)) {
        if (tx == 0)
            borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight);
        else if (tx == (int)(splitImage.x - 1))
            borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemLeft);
        else
            borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight | (int)ippBorderInMemLeft);
    }

    /* Intel IPP function call */
    status = ippiFilterMedianBorder_8u_AC4R((const Ipp8u *)pSliceSrc, srcStep, (Ipp8u *)pSliceDst, dstStep, roiSize, maskSize, borderTrdW,
                                            borderValue, pBuffer);
    return status;
}

static IppStatus ippiFilterMedianBorder_8u_C4R_T_Fun(int t, void *arg)
{
    IppStatus status = ippStsNoErr;

    ippiFilterMedian_8u_C4R_T_Str *ts = (ippiFilterMedian_8u_C4R_T_Str *)arg;

    const Ipp8u *pSrc = (const Ipp8u *)ts->pSrc;
    int srcStep = ts->srcStep;
    Ipp8u *pDst = ts->pDst;
    int dstStep = ts->dstStep;
    IppiSize maskSize = ts->maskSize;
    IppiBorderType border = ts->borderType;
    Ipp8u borderValue[4] = {ts->borderValue[0], ts->borderValue[1], ts->borderValue[2], ts->borderValue[3]};
    Ipp8u *pBuffer = ts->pBuffer;
    IppiPoint splitImage = ts->splitImage;
    IppiSize sliceSize = ts->sliceSize;
    IppiSize lastSliceSize = ts->lastSliceSize;
    int sliceBufferSize = ts->sliceBufferSize;

    int tWidth = sliceSize.width;
    int tHeight = sliceSize.height;

    IppiSize roiSize;
    int numChannels = 4;
    int tx, ty; /* slice coordinates */
    IppiBorderType borderTrd = border;
    IppiBorderType borderTrdW = borderTrd;

    int threadIdx = 0;
    ippGetThreadIdx_T(&threadIdx);

    ty = t / splitImage.x;
    tx = t % splitImage.x;

    roiSize.height = tHeight;
    if (lastSliceSize.height && (ty == (int)(splitImage.y - 1)))
        roiSize.height = lastSliceSize.height;
    roiSize.width = tWidth;
    if (lastSliceSize.width && (tx == (int)(splitImage.x - 1)))
        roiSize.width = lastSliceSize.width;

    pBuffer = pBuffer + sliceBufferSize * threadIdx;

    Ipp8u *pSliceSrc = (Ipp8u *)((Ipp8u *)(pSrc + tx * tWidth * numChannels) + ty * tHeight * srcStep);
    Ipp8u *pSliceDst = (Ipp8u *)((Ipp8u *)(pDst + tx * tWidth * numChannels) + ty * tHeight * dstStep);

    if ((splitImage.y > 1)) {
        if (ty == 0)
            borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom);
        else if (ty == (int)(splitImage.y - 1))
            borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemTop);
        else
            borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom | (int)ippBorderInMemTop);
    }
    borderTrdW = borderTrd;
    if ((splitImage.x > 1)) {
        if (tx == 0)
            borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight);
        else if (tx == (int)(splitImage.x - 1))
            borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemLeft);
        else
            borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight | (int)ippBorderInMemLeft);
    }

    /* Intel IPP function call */
    status = ippiFilterMedianBorder_8u_C4R((const Ipp8u *)pSliceSrc, srcStep, (Ipp8u *)pSliceDst, dstStep, roiSize, maskSize, borderTrdW, borderValue,
                                           pBuffer);
    return status;
}

/* /////////////////////////////////////////////////////////////////////////////
//  Name:               ippiFilterMedianBorder_8u_C1R_T
//                      ippiFilterMedianBorder_8u_C3R_T
//                      ippiFilterMedianBorder_8u_AC4R_T
//                      ippiFilterMedianBorder_8u_C4R_T
//  Purpose:            Perform median filtering of an image with border
//
//  Parameters:
//   pSrc               Pointer to the source image ROI.
//   srcStep            Distance in bytes between starting points of consecutive lines in the sorce image.
//   pDst               Pointer to the destination image ROI.
//   dstStep            Distance in bytes between starting points of consecutive lines in the destination image.
//   dstRoiSize         Size of destination ROI in pixels.
//   maskSize           Size of filter mask.
//   borderType         Type of border.
//   borderValue        Constant value to assign to pixels of the constant border. This parameter is applicable
//                      only to the ippBorderConst border type.
//   pBorderValue       Pointer to constant value to assign to pixels of the constant border. This parameter is applicable
//                      only to the ippBorderConst border type.
//   pBuffer            Pointer to the work buffer.
//
//  Return Values:
//   ippStsNoErr        Indicates no error.
//   ippStsNullPtrErr   Indicates an error when pSrc, pDst or pBufferSize is NULL.
//   ippStsSizeErr      Indicates an error when roiSize has a field with negative or zero value.
//   ippStsMaskSizeErr  Indicates an error when maskSize has a field with negative, zero or even value.
//   ippStsNotEvenStepErr Indicated an error when one of the step values is not divisible by 4
//                      for floating-point images, or by 2 for short-integer images.
//   ippStsBorderErr    Indicates an error when borderType has illegal value.
*/

IPPFUN(IppStatus, ippiFilterMedianBorder_8u_C1R_T,
       (const Ipp8u *pSrc, int srcStep, Ipp8u *pDst, int dstStep, IppiSize dstRoiSize, IppiSize maskSize, IppiBorderType borderType,
        Ipp8u borderValue, Ipp8u *pBuffer))
{
    IppStatus status = ippStsNoErr;

    if (pSrc == 0 || pDst == 0 || pBuffer == 0)
        return ippStsNullPtrErr;
    if (dstRoiSize.height <= 0 || dstRoiSize.width <= 0)
        return ippStsSizeErr;
    if ((maskSize.width <= 0) || (maskSize.height <= 0))
        return ippStsMaskSizeErr;
    if (!(maskSize.width & maskSize.height & 1))
        return ippStsMaskSizeErr;

    int numChannels = 1;
    Ipp32u numThreads = 1;

    ippGetNumThreads_T((int *)&numThreads);

    IppiSize sliceSize, lastSliceSize;
    IppiPoint splitImage;

    ownGetMedianSliceSize(dstRoiSize, maskSize, &sliceSize, &lastSliceSize, &splitImage);

    int channelMult = 2;

    Ipp8u disableThreading =
        ((splitImage.y == 1) || ((MASK_SIZE_P1 < SMALL_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_1 * channelMult))) ||
         ((SMALL_MASK_SIZE_LIMIT <= MASK_SIZE_P1) && (MASK_SIZE_P1 < MED_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_2 * channelMult))) ||
         ((MED_MASK_SIZE_LIMIT <= MASK_SIZE_P1) && (MASK_SIZE_P1 < LARGE_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_3 * channelMult))) ||
         ((MASK_SIZE_P1 >= LARGE_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_4))));

    if ((numThreads == 1) || disableThreading) {
        status = ippiFilterMedianBorder_8u_C1R(pSrc, srcStep, pDst, dstStep, dstRoiSize, maskSize, borderType, borderValue, pBuffer);
    } else {
        int bufferSize = 0;
        status = ippiFilterMedianBorderGetBufferSize(lastSliceSize, maskSize, ipp8u, numChannels, &bufferSize);

        if (status >= 0) {
            int numTiles = splitImage.x * splitImage.y;

            ippiFilterMedian_8u_C1R_T_Str ts;
            filterMedianThreadingStructureEncode_8u_C1R((Ipp8u *)pSrc, srcStep, pDst, dstStep, maskSize, borderType, borderValue, pBuffer, splitImage,
                                                        sliceSize, lastSliceSize, bufferSize, &ts);
            status = ippParallelFor_T(numTiles, (void *)&ts, ippiFilterMedianBorder_8u_C1R_T_Fun);
        }
    }

    return status;
}

IPPFUN(IppStatus, ippiFilterMedianBorder_8u_C3R_T,
       (const Ipp8u *pSrc, int srcStep, Ipp8u *pDst, int dstStep, IppiSize dstRoiSize, IppiSize maskSize, IppiBorderType borderType,
        const Ipp8u borderValue[3], Ipp8u *pBuffer))
{
    IppStatus status = ippStsNoErr;

    if (pSrc == 0 || pDst == 0 || pBuffer == 0)
        return ippStsNullPtrErr;
    if (dstRoiSize.height <= 0 || dstRoiSize.width <= 0)
        return ippStsSizeErr;
    if ((maskSize.width <= 0) || (maskSize.height <= 0))
        return ippStsMaskSizeErr;
    if (!(maskSize.width & maskSize.height & 1))
        return ippStsMaskSizeErr;

    int numChannels = 3;
    Ipp32u numThreads = 1;

    ippGetNumThreads_T((int *)&numThreads);

    IppiSize sliceSize, lastSliceSize;
    IppiPoint splitImage;

    ownGetMedianSliceSize(dstRoiSize, maskSize, &sliceSize, &lastSliceSize, &splitImage);

    Ipp8u disableThreading =
        ((splitImage.y == 1) || ((MASK_SIZE_P1 < SMALL_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_1))) ||
         ((SMALL_MASK_SIZE_LIMIT <= MASK_SIZE_P1) && (MASK_SIZE_P1 < MED_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_2))) ||
         ((MED_MASK_SIZE_LIMIT <= MASK_SIZE_P1) && (MASK_SIZE_P1 < LARGE_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_3))) ||
         ((MASK_SIZE_P1 >= LARGE_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_4))));

    if ((numThreads == 1) || disableThreading) {
        status = ippiFilterMedianBorder_8u_C3R(pSrc, srcStep, pDst, dstStep, dstRoiSize, maskSize, borderType, borderValue, pBuffer);
    } else {
        int bufferSize = 0;
        status = ippiFilterMedianBorderGetBufferSize(lastSliceSize, maskSize, ipp8u, numChannels, &bufferSize);

        if (status >= 0) {
            int numTiles = splitImage.x * splitImage.y;

            ippiFilterMedian_8u_C3R_AC4R_T_Str ts;
            filterMedianThreadingStructureEncode_8u_C3R_AC4R((Ipp8u *)pSrc, srcStep, pDst, dstStep, maskSize, borderType, borderValue, pBuffer,
                                                             splitImage, sliceSize, lastSliceSize, bufferSize, &ts);
            status = ippParallelFor_T(numTiles, (void *)&ts, ippiFilterMedianBorder_8u_C3R_T_Fun);
        }
    }

    return status;
}

IPPFUN(IppStatus, ippiFilterMedianBorder_8u_AC4R_T,
       (const Ipp8u *pSrc, int srcStep, Ipp8u *pDst, int dstStep, IppiSize dstRoiSize, IppiSize maskSize, IppiBorderType borderType,
        const Ipp8u borderValue[3], Ipp8u *pBuffer))
{
    IppStatus status = ippStsNoErr;

    if (pSrc == 0 || pDst == 0 || pBuffer == 0)
        return ippStsNullPtrErr;
    if (dstRoiSize.height <= 0 || dstRoiSize.width <= 0)
        return ippStsSizeErr;
    if ((maskSize.width <= 0) || (maskSize.height <= 0))
        return ippStsMaskSizeErr;
    if (!(maskSize.width & maskSize.height & 1))
        return ippStsMaskSizeErr;

    int numChannels = 4;
    Ipp32u numThreads = 1;

    ippGetNumThreads_T((int *)&numThreads);

    IppiSize sliceSize, lastSliceSize;
    IppiPoint splitImage;

    ownGetMedianSliceSize(dstRoiSize, maskSize, &sliceSize, &lastSliceSize, &splitImage);

    Ipp8u disableThreading =
        ((splitImage.y == 1) || ((MASK_SIZE_P1 < SMALL_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_1))) ||
         ((SMALL_MASK_SIZE_LIMIT <= MASK_SIZE_P1) && (MASK_SIZE_P1 < MED_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_2))) ||
         ((MED_MASK_SIZE_LIMIT <= MASK_SIZE_P1) && (MASK_SIZE_P1 < LARGE_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_3))) ||
         ((MASK_SIZE_P1 >= LARGE_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_4))));

    if ((numThreads == 1) || disableThreading) {
        status = ippiFilterMedianBorder_8u_AC4R(pSrc, srcStep, pDst, dstStep, dstRoiSize, maskSize, borderType, borderValue, pBuffer);
    } else {
        int bufferSize = 0;
        status = ippiFilterMedianBorderGetBufferSize(lastSliceSize, maskSize, ipp8u, numChannels, &bufferSize);

        if (status >= 0) {
            int numTiles = splitImage.x * splitImage.y;

            ippiFilterMedian_8u_C3R_AC4R_T_Str ts;
            filterMedianThreadingStructureEncode_8u_C3R_AC4R((Ipp8u *)pSrc, srcStep, pDst, dstStep, maskSize, borderType, borderValue, pBuffer,
                                                             splitImage, sliceSize, lastSliceSize, bufferSize, &ts);
            status = ippParallelFor_T(numTiles, (void *)&ts, ippiFilterMedianBorder_8u_AC4R_T_Fun);
        }
    }

    return status;
}

IPPFUN(IppStatus, ippiFilterMedianBorder_8u_C4R_T,
       (const Ipp8u *pSrc, int srcStep, Ipp8u *pDst, int dstStep, IppiSize dstRoiSize, IppiSize maskSize, IppiBorderType borderType,
        const Ipp8u borderValue[4], Ipp8u *pBuffer))
{
    IppStatus status = ippStsNoErr;

    if (pSrc == 0 || pDst == 0 || pBuffer == 0)
        return ippStsNullPtrErr;
    if (dstRoiSize.height <= 0 || dstRoiSize.width <= 0)
        return ippStsSizeErr;
    if ((maskSize.width <= 0) || (maskSize.height <= 0))
        return ippStsMaskSizeErr;
    if (!(maskSize.width & maskSize.height & 1))
        return ippStsMaskSizeErr;

    int numChannels = 4;
    Ipp32u numThreads = 1;

    ippGetNumThreads_T((int *)&numThreads);

    IppiSize sliceSize, lastSliceSize;
    IppiPoint splitImage;

    ownGetMedianSliceSize(dstRoiSize, maskSize, &sliceSize, &lastSliceSize, &splitImage);

    Ipp8u disableThreading =
        ((splitImage.y == 1) || ((MASK_SIZE_P1 < SMALL_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_1))) ||
         ((SMALL_MASK_SIZE_LIMIT <= MASK_SIZE_P1) && (MASK_SIZE_P1 < MED_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_2))) ||
         ((MED_MASK_SIZE_LIMIT <= MASK_SIZE_P1) && (MASK_SIZE_P1 < LARGE_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_3))) ||
         ((MASK_SIZE_P1 >= LARGE_MASK_SIZE_LIMIT) && (dstRoiSize.height < (WIDTH_LIMIT_4))));

    if ((numThreads == 1) || disableThreading) {
        status = ippiFilterMedianBorder_8u_C4R(pSrc, srcStep, pDst, dstStep, dstRoiSize, maskSize, borderType, borderValue, pBuffer);
    } else {
        int bufferSize = 0;
        status = ippiFilterMedianBorderGetBufferSize(lastSliceSize, maskSize, ipp8u, numChannels, &bufferSize);

        if (status >= 0) {
            int numTiles = splitImage.x * splitImage.y;

            ippiFilterMedian_8u_C4R_T_Str ts;
            filterMedianThreadingStructureEncode_8u_C4R((Ipp8u *)pSrc, srcStep, pDst, dstStep, maskSize, borderType, borderValue, pBuffer, splitImage,
                                                        sliceSize, lastSliceSize, bufferSize, &ts);
            status = ippParallelFor_T(numTiles, (void *)&ts, ippiFilterMedianBorder_8u_C4R_T_Fun);
        }
    }

    return status;
}
