You are on page 1of 37

#include <cmath>

#include "convolution.h"
///////////////////////////////////////////////////////////////////////////////
// 1D convolution
// We assume input and kernel signal start from t=0.
///////////////////////////////////////////////////////////////////////////////
bool convolve1D(float* in, float* out, int dataSize, float* kernel, int kernelSi
ze)
{
int i, j, k;
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSize <=0 || kernelSize <= 0) return false;
// start convolution from out[kernelSize-1] to out[dataSize-1] (last)
for(i = kernelSize-1; i < dataSize; ++i)
{
out[i] = 0;
// init to 0 before accumulate
for(j = i, k = 0; k < kernelSize; --j, ++k)
out[i] += in[j] * kernel[k];
}
// convolution from out[0] to out[kernelSize-2]
for(i = 0; i < kernelSize - 1; ++i)
{
out[i] = 0;
// init to 0 before sum
for(j = i, k = 0; j >= 0; --j, ++k)
out[i] += in[j] * kernel[k];
}
return true;
}
///////////////////////////////////////////////////////////////////////////////
// Simplest 2D convolution routine. It is easy to understand how convolution
// works, but is very slow, because of no optimization.
///////////////////////////////////////////////////////////////////////////////
bool convolve2DSlow(unsigned char* in, unsigned char* out, int dataSizeX, int da
taSizeY,
float* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n, mm, nn;
int kCenterX, kCenterY;
// center index of kernel
float sum;
// temp accumulation buffer
int rowIndex, colIndex;
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX / 2;
kCenterY = kernelSizeY / 2;

for(i=0; i < dataSizeY; ++i)


{
for(j=0; j < dataSizeX; ++j)
{
sum = 0;
for(m=0; m < kernelSizeY; ++m)
{
mm = kernelSizeY - 1 - m;

// rows
// columns
// init to 0 before sum
// kernel rows
// row index of flipped kernel

for(n=0; n < kernelSizeX; ++n) // kernel columns


{
nn = kernelSizeX - 1 - n; // column index of flipped kerne
l
// index of input signal, used for checking boundary
rowIndex = i + m - kCenterY;
colIndex = j + n - kCenterX;
// ignore input samples which are out of bound
if(rowIndex >= 0 && rowIndex < dataSizeY && colIndex >= 0 &&
colIndex < dataSizeX)
sum += in[dataSizeX * rowIndex + colIndex] * kernel[kern
elSizeX * mm + nn];
}
}
out[dataSizeX * i + j] = (unsigned char)((float)fabs(sum) + 0.5f);
}
}
return true;
}

///////////////////////////////////////////////////////////////////////////////
// 2D convolution
// 2D data are usually stored in computer memory as contiguous 1D array.
// So, we are using 1D array for 2D data.
// 2D convolution assumes the kernel is center originated, which means, if
// kernel size 3 then, k[-1], k[0], k[1]. The middle of index is always 0.
// The following programming logics are somewhat complicated because of using
// pointer indexing in order to minimize the number of multiplications.
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// unsigned char version (8bit): Note that the output is always positive number
///////////////////////////////////////////////////////////////////////////////
bool convolve2D(unsigned char* in, unsigned char* out, int dataSizeX, int dataSi
zeY,
float* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n;
unsigned char *inPtr, *inPtr2, *outPtr;
float *kPtr;
int kCenterX, kCenterY;
int rowMin, rowMax;
// to check boundary of inpu
t array
int colMin, colMax;
//
float sum;
// temp accumulation buffer

// check validity of params


if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
// init working pointers
inPtr = inPtr2 = &in[dataSizeX * kCenterY + kCenterX]; // note that it is
shifted (kCenterX, kCenterY),
outPtr = out;
kPtr = kernel;
// start convolution
for(i= 0; i < dataSizeY; ++i)
// number of rows
{
// compute the range of convolution, the current row of kernel should be
between these
rowMax = i + kCenterY;
rowMin = i - dataSizeY + kCenterY;
for(j = 0; j <
{
// compute
ould be between these
colMax = j
colMin = j

dataSizeX; ++j)

// number of columns

the range of convolution, the current column of kernel sh


+ kCenterX;
- dataSizeX + kCenterX;

sum = 0;

// set to 0 before accumulat

e
// flip the kernel and traverse all the kernel values
// multiply each kernel value with underlying input data
for(m = 0; m < kernelSizeY; ++m)
// kernel rows
{
// check if the index is out of bound of input array
if(m <= rowMax && m > rowMin)
{
for(n = 0; n < kernelSizeX; ++n)
{
// check the boundary of array
if(n <= colMax && n > colMin)
sum += *(inPtr - n) * *kPtr;
++kPtr;

// next kernel

}
}
else
kPtr += kernelSizeX;

// out of bound, move to nex

t row of kernel
inPtr -= dataSizeX;

// move input data 1 raw up

}
// convert negative number to positive
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
kPtr = kernel;
inPtr = ++inPtr2;

// reset kernel to (0,0)


// next input

++outPtr;

// next output

}
}
return true;
}
///////////////////////////////////////////////////////////////////////////////
// unsigned short (16bit)
///////////////////////////////////////////////////////////////////////////////
bool convolve2D(unsigned short* in, unsigned short* out, int dataSizeX, int data
SizeY,
float* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n;
unsigned short *inPtr, *inPtr2, *outPtr;
float *kPtr;
int kCenterX, kCenterY;
int rowMin, rowMax;
// to check boundary of inpu
t array
int colMin, colMax;
//
float sum;
// temp accumulation buffer
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
// init working pointers
inPtr = inPtr2 = &in[dataSizeX * kCenterY + kCenterX]; // note that it is
shifted (kCenterX, kCenterY),
outPtr = out;
kPtr = kernel;
// start convolution
for(i= 0; i < dataSizeY; ++i)
// number of rows
{
// compute the range of convolution, the current row of kernel should be
between these
rowMax = i + kCenterY;
rowMin = i - dataSizeY + kCenterY;
for(j = 0; j <
{
// compute
ould be between these
colMax = j
colMin = j
sum = 0;

dataSizeX; ++j)

// number of columns

the range of convolution, the current column of kernel sh


+ kCenterX;
- dataSizeX + kCenterX;
// set to 0 before accumulat

e
// flip the kernel and traverse all the kernel values
// multiply each kernel value with underlying input data
for(m = 0; m < kernelSizeY; ++m)
// kernel rows
{
// check if the index is out of bound of input array

if(m <= rowMax && m > rowMin)


{
for(n = 0; n < kernelSizeX; ++n)
{
// check the boundary of array
if(n <= colMax && n > colMin)
sum += *(inPtr - n) * *kPtr;
++kPtr;

// next kernel

}
}
else
kPtr += kernelSizeX;

// out of bound, move to nex

t row of kernel
inPtr -= dataSizeX;

// move input data 1 raw up

}
// convert negative number to positive
*outPtr = (unsigned short)((float)fabs(sum) + 0.5f);
kPtr = kernel;
inPtr = ++inPtr2;
++outPtr;

// reset kernel to (0,0)


// next input
// next output

}
}
return true;
}
///////////////////////////////////////////////////////////////////////////////
// signed integer (32bit) version:
///////////////////////////////////////////////////////////////////////////////
bool convolve2D(int* in, int* out, int dataSizeX, int dataSizeY,
float* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n;
int *inPtr, *inPtr2, *outPtr;
float *kPtr;
int kCenterX, kCenterY;
int rowMin, rowMax;
// to check boundary of inpu
t array
int colMin, colMax;
//
float sum;
// temp accumulation buffer
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
// init working pointers
inPtr = inPtr2 = &in[dataSizeX * kCenterY + kCenterX]; // note that it is
shifted (kCenterX, kCenterY),
outPtr = out;
kPtr = kernel;
// start convolution

for(i= 0; i < dataSizeY; ++i)


// number of rows
{
// compute the range of convolution, the current row of kernel should be
between these
rowMax = i + kCenterY;
rowMin = i - dataSizeY + kCenterY;
for(j = 0; j <
{
// compute
ould be between these
colMax = j
colMin = j

dataSizeX; ++j)

// number of columns

the range of convolution, the current column of kernel sh


+ kCenterX;
- dataSizeX + kCenterX;

sum = 0;

// set to 0 before accumulat

e
// flip the kernel and traverse all the kernel values
// multiply each kernel value with underlying input data
for(m = 0; m < kernelSizeY; ++m)
// kernel rows
{
// check if the index is out of bound of input array
if(m <= rowMax && m > rowMin)
{
for(n = 0; n < kernelSizeX; ++n)
{
// check the boundary of array
if(n <= colMax && n > colMin)
sum += *(inPtr - n) * *kPtr;
++kPtr;

// next kernel

}
}
else
kPtr += kernelSizeX;

// out of bound, move to nex

t row of kernel
inPtr -= dataSizeX;

// move input data 1 raw up

}
// convert integer number
if(sum >= 0) *outPtr = (int)(sum + 0.5f);
else *outPtr = (int)(sum - 0.5f);
kPtr = kernel;
inPtr = ++inPtr2;
++outPtr;

// reset kernel to (0,0)


// next input
// next output

}
}
return true;
}
///////////////////////////////////////////////////////////////////////////////
// single float precision version:
///////////////////////////////////////////////////////////////////////////////
bool convolve2D(float* in, float* out, int dataSizeX, int dataSizeY,
float* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n;

float *inPtr, *inPtr2, *outPtr, *kPtr;


int kCenterX, kCenterY;
int rowMin, rowMax;
t array
int colMin, colMax;

// to check boundary of inpu


//

// check validity of params


if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
// init working pointers
inPtr = inPtr2 = &in[dataSizeX * kCenterY + kCenterX]; // note that it is
shifted (kCenterX, kCenterY),
outPtr = out;
kPtr = kernel;
// start convolution
for(i= 0; i < dataSizeY; ++i)
// number of rows
{
// compute the range of convolution, the current row of kernel should be
between these
rowMax = i + kCenterY;
rowMin = i - dataSizeY + kCenterY;
for(j = 0; j <
{
// compute
ould be between these
colMax = j
colMin = j

dataSizeX; ++j)

// number of columns

the range of convolution, the current column of kernel sh


+ kCenterX;
- dataSizeX + kCenterX;

*outPtr = 0;

// set to 0 before accumulat

e
// flip the kernel and traverse all the kernel values
// multiply each kernel value with underlying input data
for(m = 0; m < kernelSizeY; ++m)
// kernel rows
{
// check if the index is out of bound of input array
if(m <= rowMax && m > rowMin)
{
for(n = 0; n < kernelSizeX; ++n)
{
// check the boundary of array
if(n <= colMax && n > colMin)
*outPtr += *(inPtr - n) * *kPtr;
++kPtr;
// next kernel
}
}
else
kPtr += kernelSizeX;
// out of bound, move to nex
t row of kernel
inPtr -= dataSizeX;
}

// move input data 1 raw up

kPtr = kernel;
inPtr = ++inPtr2;
++outPtr;

// reset kernel to (0,0)


// next input
// next output

}
}
return true;
}
///////////////////////////////////////////////////////////////////////////////
// double float precision version:
///////////////////////////////////////////////////////////////////////////////
bool convolve2D(double* in, double* out, int dataSizeX, int dataSizeY,
double* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n;
double *inPtr, *inPtr2, *outPtr, *kPtr;
int kCenterX, kCenterY;
int rowMin, rowMax;
// to check boundary of inpu
t array
int colMin, colMax;
//
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
// init working pointers
inPtr = inPtr2 = &in[dataSizeX * kCenterY + kCenterX]; // note that it is
shifted (kCenterX, kCenterY),
outPtr = out;
kPtr = kernel;
// start convolution
for(i= 0; i < dataSizeY; ++i)
// number of rows
{
// compute the range of convolution, the current row of kernel should be
between these
rowMax = i + kCenterY;
rowMin = i - dataSizeY + kCenterY;
for(j = 0; j <
{
// compute
ould be between these
colMax = j
colMin = j

dataSizeX; ++j)

// number of columns

the range of convolution, the current column of kernel sh


+ kCenterX;
- dataSizeX + kCenterX;

*outPtr = 0;

// set to 0 before accumulat

e
// flip the kernel and traverse all the kernel values
// multiply each kernel value with underlying input data
for(m = 0; m < kernelSizeY; ++m)
// kernel rows
{
// check if the index is out of bound of input array
if(m <= rowMax && m > rowMin)

{
for(n = 0; n < kernelSizeX; ++n)
{
// check the boundary of array
if(n <= colMax && n > colMin)
*outPtr += *(inPtr - n) * *kPtr;
++kPtr;
// next kernel
}
}
else
kPtr += kernelSizeX;

// out of bound, move to nex

t row of kernel
inPtr -= dataSizeX;

// move input data 1 raw up

}
kPtr = kernel;
inPtr = ++inPtr2;
++outPtr;

// reset kernel to (0,0)


// next input
// next output

}
}
return true;
}

///////////////////////////////////////////////////////////////////////////////
// Separable 2D Convolution
// If the MxN kernel can be separable to (Mx1) and (1xN) matrices, the
// multiplication can be reduced to M+N comapred to MxN in normal convolution.
// It does not check the output is excceded max for performance reason. And we
// assume the kernel contains good(valid) data, therefore, the result cannot be
// larger than max.
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// unsigned char (8-bit) version
///////////////////////////////////////////////////////////////////////////////
bool convolve2DSeparable(unsigned char* in, unsigned char* out, int dataSizeX, i
nt dataSizeY,
float* kernelX, int kSizeX, float* kernelY, int kSizeY)
{
int i, j, k, m, n;
float *tmp, *sum;
// intermediate data buffer
unsigned char *inPtr, *outPtr;
// working pointers
float *tmpPtr, *tmpPtr2;
// working pointers
int kCenter, kOffset, endIndex;
// kernel indice
// check validity of params
if(!in || !out || !kernelX || !kernelY) return false;
if(dataSizeX <= 0 || kSizeX <= 0) return false;
// allocate temp storage to keep intermediate result
tmp = new float[dataSizeX * dataSizeY];
if(!tmp) return false; // memory allocation error
// store accumulated sum
sum = new float[dataSizeX];
if(!sum) return false; // memory allocation error

// covolve horizontal direction ///////////////////////


// find center position of kernel (half of kernel size)
kCenter = kSizeX >> 1;
// center index of kernel ar
ray
endIndex = dataSizeX - kCenter;
volution

// index for full kernel con

// init working pointers


inPtr = in;
tmpPtr = tmp;
s from 1D horizontal convolution

// store intermediate result

// start horizontal convolution (x-direction)


for(i=0; i < dataSizeY; ++i)
{
kOffset = 0;
kernel varies for each sample
// COLUMN FROM index=0 TO index=kCenter-1
for(j=0; j < kCenter; ++j)
{
*tmpPtr = 0;

// number of rows
// starting index of partial

// init to 0 before accumula

tion
for(k = kCenter + kOffset, m = 0; k >= 0; --k, ++m) // convolve with
partial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++tmpPtr;
// next output
++kOffset;
// increase starting index o
f kernel
}
// COLUMN FROM index=kCenter TO index=(dataSizeX-kCenter-1)
for(j = kCenter; j < endIndex; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
te
for(k = kSizeX-1, m = 0; k >= 0; --k, ++m) // full kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
}
kOffset = 1;
ernel varies for each sample

// ending index of partial k

// COLUMN FROM index=(dataSizeX-kCenter) TO index=(dataSizeX-1)


for(j = endIndex; j < dataSizeX; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
tion

for(k = kSizeX-1, m=0; k >= kOffset; --k, ++m) // convolve with pa


rtial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
++kOffset;
// increase ending index of
partial kernel
}
inPtr += kCenter;
// next row
}
// END OF HORIZONTAL CONVOLUTION //////////////////////
// start vertical direction ///////////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeY >> 1;
// center index of vertical
kernel
endIndex = dataSizeY - kCenter;
// index where full kernel c
onvolution should stop
// set working pointers
tmpPtr = tmpPtr2 = tmp;
outPtr = out;
// clear out array before accumulation
for(i = 0; i < dataSizeX; ++i)
sum[i] = 0;
// start to convolve vertical direction (y-direction)
// ROW FROM index=0 TO index=(kCenter-1)
kOffset = 0;
kernel varies for each sample
for(i=0; i < kCenter; ++i)
{
for(k = kCenter + kOffset; k >= 0; --k)
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// starting index of partial

// convolve with partial ker

// convert and copy from sum

to out
{
// covert negative to positive
*outPtr = (unsigned char)((float)fabs(sum[n]) + 0.5f);
sum[n] = 0;
// reset to zero for next su
mming
++outPtr;

// next element of output

}
tmpPtr = tmpPtr2;
++kOffset;

// reset input pointer


// increase starting index o

f kernel
}
// ROW FROM index=kCenter TO index=(dataSizeY-kCenter-1)
for(i = kCenter; i < endIndex; ++i)
{
for(k = kSizeY -1; k >= 0; --k)
// convolve with full kernel
{
for(j = 0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
// covert negative to positive
*outPtr = (unsigned char)((float)fabs(sum[n]) + 0.5f);
sum[n] = 0;
// reset for next summing
++outPtr;
// next output
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
}
// ROW FROM index=(dataSizeY-kCenter) TO index=(dataSizeY-1)
kOffset = 1;
// ending index of partial k
ernel varies for each sample
for(i=endIndex; i < dataSizeY; ++i)
{
for(k = kSizeY-1; k >= kOffset; --k)
// convolve with partial ker
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
// covert negative to positive
*outPtr = (unsigned char)((float)fabs(sum[n]) + 0.5f);
sum[n] = 0;
// reset for next summing
++outPtr;
// next output
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
++kOffset;

// next input
// increase ending index of

kernel
}
// END OF VERTICAL CONVOLUTION ////////////////////////

// deallocate temp buffers


delete [] tmp;
delete [] sum;
return true;
}
///////////////////////////////////////////////////////////////////////////////
// unsigned short (16-bit) version
///////////////////////////////////////////////////////////////////////////////
bool convolve2DSeparable(unsigned short* in, unsigned short* out, int dataSizeX,
int dataSizeY,
float* kernelX, int kSizeX, float* kernelY, int kSizeY)
{
int i, j, k, m, n;
float *tmp, *sum;
// intermediate data buffer
unsigned short *inPtr, *outPtr;
// working pointers
float *tmpPtr, *tmpPtr2;
// working pointers
int kCenter, kOffset, endIndex;
// kernel indice
// check validity of params
if(!in || !out || !kernelX || !kernelY) return false;
if(dataSizeX <= 0 || kSizeX <= 0) return false;
// allocate temp storage to keep intermediate result
tmp = new float[dataSizeX * dataSizeY];
if(!tmp) return false; // memory allocation error
// store accumulated sum
sum = new float[dataSizeX];
if(!sum) return false; // memory allocation error
// covolve horizontal direction ///////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeX >> 1;
// center index of kernel ar
ray
endIndex = dataSizeX - kCenter;
volution

// index for full kernel con

// init working pointers


inPtr = in;
tmpPtr = tmp;
s from 1D horizontal convolution

// store intermediate result

// start horizontal convolution (x-direction)


for(i=0; i < dataSizeY; ++i)
{
kOffset = 0;
kernel varies for each sample
// COLUMN FROM index=0 TO index=kCenter-1
for(j=0; j < kCenter; ++j)
{
*tmpPtr = 0;

// number of rows
// starting index of partial

// init to 0 before accumula

tion
for(k = kCenter + kOffset, m = 0; k >= 0; --k, ++m) // convolve with

partial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++tmpPtr;
// next output
++kOffset;
// increase starting index o
f kernel
}
// COLUMN FROM index=kCenter TO index=(dataSizeX-kCenter-1)
for(j = kCenter; j < endIndex; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
te
for(k = kSizeX-1, m = 0; k >= 0; --k, ++m) // full kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
}
kOffset = 1;
ernel varies for each sample

// ending index of partial k

// COLUMN FROM index=(dataSizeX-kCenter) TO index=(dataSizeX-1)


for(j = endIndex; j < dataSizeX; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
tion
for(k = kSizeX-1, m=0; k >= kOffset; --k, ++m) // convolve with pa
rtial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
++kOffset;
// increase ending index of
partial kernel
}
inPtr += kCenter;
// next row
}
// END OF HORIZONTAL CONVOLUTION //////////////////////
// start vertical direction ///////////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeY >> 1;
// center index of vertical
kernel
endIndex = dataSizeY - kCenter;
// index where full kernel c
onvolution should stop
// set working pointers
tmpPtr = tmpPtr2 = tmp;
outPtr = out;
// clear out array before accumulation

for(i = 0; i < dataSizeX; ++i)


sum[i] = 0;
// start to convolve vertical direction (y-direction)
// ROW FROM index=0 TO index=(kCenter-1)
kOffset = 0;
kernel varies for each sample
for(i=0; i < kCenter; ++i)
{
for(k = kCenter + kOffset; k >= 0; --k)
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// starting index of partial

// convolve with partial ker

// convert and copy from sum

to out
{
// covert negative to positive
*outPtr = (unsigned short)((float)fabs(sum[n]) + 0.5f);
sum[n] = 0;
// reset to zero for next su
mming
++outPtr;

// next element of output

}
tmpPtr = tmpPtr2;
++kOffset;

// reset input pointer


// increase starting index o

f kernel
}
// ROW FROM index=kCenter TO index=(dataSizeY-kCenter-1)
for(i = kCenter; i < endIndex; ++i)
{
for(k = kSizeY -1; k >= 0; --k)
// convolve with full kernel
{
for(j = 0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
// covert negative to positive
*outPtr = (unsigned short)((float)fabs(sum[n]) + 0.5f);
sum[n] = 0;
// reset before next summing
++outPtr;
// next output
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
}

// ROW FROM index=(dataSizeY-kCenter) TO index=(dataSizeY-1)


kOffset = 1;
// ending index of partial k
ernel varies for each sample
for(i=endIndex; i < dataSizeY; ++i)
{
for(k = kSizeY-1; k >= kOffset; --k)
// convolve with partial ker
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
// covert negative to positive
*outPtr = (unsigned short)((float)fabs(sum[n]) + 0.5f);
sum[n] = 0;
// reset before next summing
++outPtr;
// next output
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
++kOffset;

// next input
// increase ending index of

kernel
}
// END OF VERTICAL CONVOLUTION ////////////////////////
// deallocate temp buffers
delete [] tmp;
delete [] sum;
return true;
}

///////////////////////////////////////////////////////////////////////////////
// integer (32-bit) version
///////////////////////////////////////////////////////////////////////////////
bool convolve2DSeparable(int* in, int* out, int dataSizeX, int dataSizeY,
float* kernelX, int kSizeX, float* kernelY, int kSizeY)
{
int i, j, k, m, n;
float *tmp, *sum;
// intermediate data buffer
int *inPtr, *outPtr;
// working pointers
float *tmpPtr, *tmpPtr2;
// working pointers
int kCenter, kOffset, endIndex;
// kernel indice
// check validity of params
if(!in || !out || !kernelX || !kernelY) return false;
if(dataSizeX <= 0 || kSizeX <= 0) return false;
// allocate temp storage to keep intermediate result
tmp = new float[dataSizeX * dataSizeY];
if(!tmp) return false; // memory allocation error

// store accumulated sum


sum = new float[dataSizeX];
if(!sum) return false; // memory allocation error
// covolve horizontal direction ///////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeX >> 1;
// center index of kernel ar
ray
endIndex = dataSizeX - kCenter;
volution

// index for full kernel con

// init working pointers


inPtr = in;
tmpPtr = tmp;
s from 1D horizontal convolution

// store intermediate result

// start horizontal convolution (x-direction)


for(i=0; i < dataSizeY; ++i)
{
kOffset = 0;
kernel varies for each sample
// COLUMN FROM index=0 TO index=kCenter-1
for(j=0; j < kCenter; ++j)
{
*tmpPtr = 0;

// number of rows
// starting index of partial

// init to 0 before accumula

tion
for(k = kCenter + kOffset, m = 0; k >= 0; --k, ++m) // convolve with
partial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++tmpPtr;
// next output
++kOffset;
// increase starting index o
f kernel
}
// COLUMN FROM index=kCenter TO index=(dataSizeX-kCenter-1)
for(j = kCenter; j < endIndex; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
te
for(k = kSizeX-1, m = 0; k >= 0; --k, ++m) // full kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
}
kOffset = 1;
ernel varies for each sample

// ending index of partial k

// COLUMN FROM index=(dataSizeX-kCenter) TO index=(dataSizeX-1)


for(j = endIndex; j < dataSizeX; ++j)

{
*tmpPtr = 0;

// init to 0 before accumula

tion
for(k = kSizeX-1, m=0; k >= kOffset; --k, ++m) // convolve with pa
rtial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
++kOffset;
// increase ending index of
partial kernel
}
inPtr += kCenter;
// next row
}
// END OF HORIZONTAL CONVOLUTION //////////////////////
// start vertical direction ///////////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeY >> 1;
// center index of vertical
kernel
endIndex = dataSizeY - kCenter;
// index where full kernel c
onvolution should stop
// set working pointers
tmpPtr = tmpPtr2 = tmp;
outPtr = out;
// clear out array before accumulation
for(i = 0; i < dataSizeX; ++i)
sum[i] = 0;
// start to convolve vertical direction (y-direction)
// ROW FROM index=0 TO index=(kCenter-1)
kOffset = 0;
kernel varies for each sample
for(i=0; i < kCenter; ++i)
{
for(k = kCenter + kOffset; k >= 0; --k)
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// starting index of partial

// convolve with partial ker

// convert and copy from sum

to out
{
if(sum[n] >= 0)
*outPtr = (int)(sum[n] + 0.5f);

// store final result to out

put array
else
*outPtr = (int)(sum[n] - 0.5f);

// store final result to out

put array
sum[n] = 0;

// reset to zero for next su

++outPtr;

// next element of output

mming
}
tmpPtr = tmpPtr2;
++kOffset;

// reset input pointer


// increase starting index o

f kernel
}
// ROW FROM index=kCenter TO index=(dataSizeY-kCenter-1)
for(i = kCenter; i < endIndex; ++i)
{
for(k = kSizeY -1; k >= 0; --k)
// convolve with full kernel
{
for(j = 0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
if(sum[n] >= 0)
*outPtr = (int)(sum[n] + 0.5f);

// store final result to out

put array
else
*outPtr = (int)(sum[n] - 0.5f);

// store final result to out

put array
sum[n] = 0;

// reset to 0 before next su

++outPtr;

// next output

mming
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
}
// ROW FROM index=(dataSizeY-kCenter) TO index=(dataSizeY-1)
kOffset = 1;
// ending index of partial k
ernel varies for each sample
for(i=endIndex; i < dataSizeY; ++i)
{
for(k = kSizeY-1; k >= kOffset; --k)
// convolve with partial ker
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)
to out

// convert and copy from sum

{
if(sum[n] >= 0)
*outPtr = (int)(sum[n] + 0.5f);

// store final result to out

put array
else
*outPtr = (int)(sum[n] - 0.5f);

// store final result to out

put array
sum[n] = 0;
++outPtr;

// reset before next summing


// next output

}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
++kOffset;

// next input
// increase ending index of

kernel
}
// END OF VERTICAL CONVOLUTION ////////////////////////
// deallocate temp buffers
delete [] tmp;
delete [] sum;
return true;
}

///////////////////////////////////////////////////////////////////////////////
// single precision float version
///////////////////////////////////////////////////////////////////////////////
bool convolve2DSeparable(float* in, float* out, int dataSizeX, int dataSizeY,
float* kernelX, int kSizeX, float* kernelY, int kSizeY)
{
int i, j, k, m, n;
float *tmp, *sum;
// intermediate data buffer
float *inPtr, *outPtr;
// working pointers
float *tmpPtr, *tmpPtr2;
// working pointers
int kCenter, kOffset, endIndex;
// kernel indice
// check validity of params
if(!in || !out || !kernelX || !kernelY) return false;
if(dataSizeX <= 0 || kSizeX <= 0) return false;
// allocate temp storage to keep intermediate result
tmp = new float[dataSizeX * dataSizeY];
if(!tmp) return false; // memory allocation error
// store accumulated sum
sum = new float[dataSizeX];
if(!sum) return false; // memory allocation error
// covolve horizontal direction ///////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeX >> 1;
// center index of kernel ar
ray
endIndex = dataSizeX - kCenter;
volution
// init working pointers

// index for full kernel con

inPtr = in;
tmpPtr = tmp;
s from 1D horizontal convolution
// start horizontal convolution (x-direction)
for(i=0; i < dataSizeY; ++i)
{
kOffset = 0;
kernel varies for each sample
// COLUMN FROM index=0 TO index=kCenter-1
for(j=0; j < kCenter; ++j)
{
*tmpPtr = 0;

// store intermediate result

// number of rows
// starting index of partial

// init to 0 before accumula

tion
for(k = kCenter + kOffset, m = 0; k >= 0; --k, ++m) // convolve with
partial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++tmpPtr;
// next output
++kOffset;
// increase starting index o
f kernel
}
// COLUMN FROM index=kCenter TO index=(dataSizeX-kCenter-1)
for(j = kCenter; j < endIndex; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
te
for(k = kSizeX-1, m = 0; k >= 0; --k, ++m) // full kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
}
kOffset = 1;
ernel varies for each sample

// ending index of partial k

// COLUMN FROM index=(dataSizeX-kCenter) TO index=(dataSizeX-1)


for(j = endIndex; j < dataSizeX; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
tion
for(k = kSizeX-1, m=0; k >= kOffset; --k, ++m) // convolve with pa
rtial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
++kOffset;
// increase ending index of
partial kernel
}

inPtr += kCenter;
// next row
}
// END OF HORIZONTAL CONVOLUTION //////////////////////
// start vertical direction ///////////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeY >> 1;
// center index of vertical
kernel
endIndex = dataSizeY - kCenter;
// index where full kernel c
onvolution should stop
// set working pointers
tmpPtr = tmpPtr2 = tmp;
outPtr = out;
// clear out array before accumulation
for(i = 0; i < dataSizeX; ++i)
sum[i] = 0;
// start to convolve vertical direction (y-direction)
// ROW FROM index=0 TO index=(kCenter-1)
kOffset = 0;
kernel varies for each sample
for(i=0; i < kCenter; ++i)
{
for(k = kCenter + kOffset; k >= 0; --k)
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// starting index of partial

// convolve with partial ker

// convert and copy from sum

to out
{
*outPtr = sum[n];

// store final result to out

sum[n] = 0;

// reset to zero for next su

++outPtr;

// next element of output

put array
mming
}
tmpPtr = tmpPtr2;
++kOffset;

// reset input pointer


// increase starting index o

f kernel
}
// ROW FROM index=kCenter TO index=(dataSizeY-kCenter-1)
for(i = kCenter; i < endIndex; ++i)
{
for(k = kSizeY -1; k >= 0; --k)
// convolve with full kernel
{
for(j = 0; j < dataSizeX; ++j)
{

sum[j] += *tmpPtr * kernelY[k];


++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
*outPtr = sum[n];

// store final result to out

sum[n] = 0;
++outPtr;

// reset before next summing


// next output

put buffer
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
}
// ROW FROM index=(dataSizeY-kCenter) TO index=(dataSizeY-1)
kOffset = 1;
// ending index of partial k
ernel varies for each sample
for(i=endIndex; i < dataSizeY; ++i)
{
for(k = kSizeY-1; k >= kOffset; --k)
// convolve with partial ker
nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
*outPtr = sum[n];

// store final result to out

sum[n] = 0;
++outPtr;

// reset to 0 for next sum


// next output

put array
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
++kOffset;

// next input
// increase ending index of

kernel
}
// END OF VERTICAL CONVOLUTION ////////////////////////
// deallocate temp buffers
delete [] tmp;
delete [] sum;
return true;
}
///////////////////////////////////////////////////////////////////////////////
// double precision float version

///////////////////////////////////////////////////////////////////////////////
bool convolve2DSeparable(double* in, double* out, int dataSizeX, int dataSizeY,
double* kernelX, int kSizeX, float* kernelY, int kSizeY
)
{
int i, j, k, m, n;
double *tmp, *sum;
// intermediate data buffer
double *inPtr, *outPtr;
// working pointers
double *tmpPtr, *tmpPtr2;
// working pointers
int kCenter, kOffset, endIndex;
// kernel indice
// check validity of params
if(!in || !out || !kernelX || !kernelY) return false;
if(dataSizeX <= 0 || kSizeX <= 0) return false;
// allocate temp storage to keep intermediate result
tmp = new double[dataSizeX * dataSizeY];
if(!tmp) return false; // memory allocation error
// store accumulated sum
sum = new double[dataSizeX];
if(!sum) return false; // memory allocation error
// covolve horizontal direction ///////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeX >> 1;
// center index of kernel ar
ray
endIndex = dataSizeX - kCenter;
volution

// index for full kernel con

// init working pointers


inPtr = in;
tmpPtr = tmp;
s from 1D horizontal convolution

// store intermediate result

// start horizontal convolution (x-direction)


for(i=0; i < dataSizeY; ++i)
{
kOffset = 0;
kernel varies for each sample
// COLUMN FROM index=0 TO index=kCenter-1
for(j=0; j < kCenter; ++j)
{
*tmpPtr = 0;

// number of rows
// starting index of partial

// init to 0 before accumula

tion
for(k = kCenter + kOffset, m = 0; k >= 0; --k, ++m) // convolve with
partial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++tmpPtr;
// next output
++kOffset;
// increase starting index o
f kernel
}
// COLUMN FROM index=kCenter TO index=(dataSizeX-kCenter-1)

for(j = kCenter; j < endIndex; ++j)


{
*tmpPtr = 0;

// init to 0 before accumula

te
for(k = kSizeX-1, m = 0; k >= 0; --k, ++m) // full kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
}
kOffset = 1;
ernel varies for each sample

// ending index of partial k

// COLUMN FROM index=(dataSizeX-kCenter) TO index=(dataSizeX-1)


for(j = endIndex; j < dataSizeX; ++j)
{
*tmpPtr = 0;
// init to 0 before accumula
tion
for(k = kSizeX-1, m=0; k >= kOffset; --k, ++m) // convolve with pa
rtial of kernel
{
*tmpPtr += *(inPtr + m) * kernelX[k];
}
++inPtr;
// next input
++tmpPtr;
// next output
++kOffset;
// increase ending index of
partial kernel
}
inPtr += kCenter;
// next row
}
// END OF HORIZONTAL CONVOLUTION //////////////////////
// start vertical direction ///////////////////////////
// find center position of kernel (half of kernel size)
kCenter = kSizeY >> 1;
// center index of vertical
kernel
endIndex = dataSizeY - kCenter;
// index where full kernel c
onvolution should stop
// set working pointers
tmpPtr = tmpPtr2 = tmp;
outPtr = out;
// clear out array before accumulation
for(i = 0; i < dataSizeX; ++i)
sum[i] = 0;
// start to convolve vertical direction (y-direction)
// ROW FROM index=0 TO index=(kCenter-1)
kOffset = 0;
kernel varies for each sample
for(i=0; i < kCenter; ++i)
{

// starting index of partial

for(k = kCenter + kOffset; k >= 0; --k)

// convolve with partial ker

nel
{
for(j=0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
*outPtr = sum[n];

// store final result to out

sum[n] = 0;

// reset to zero for next su

++outPtr;

// next element of output

put array
mming
}
tmpPtr = tmpPtr2;
++kOffset;

// reset input pointer


// increase starting index o

f kernel
}
// ROW FROM index=kCenter TO index=(dataSizeY-kCenter-1)
for(i = kCenter; i < endIndex; ++i)
{
for(k = kSizeY -1; k >= 0; --k)
// convolve with full kernel
{
for(j = 0; j < dataSizeX; ++j)
{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
*outPtr = sum[n];

// store final result to out

sum[n] = 0;

// reset to zero for next su

++outPtr;

// next output

put array
mming
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
}
// ROW FROM index=(dataSizeY-kCenter) TO index=(dataSizeY-1)
kOffset = 1;
// ending index of partial k
ernel varies for each sample
for(i=endIndex; i < dataSizeY; ++i)
{
for(k = kSizeY-1; k >= kOffset; --k)
// convolve with partial ker
nel
{

for(j=0; j < dataSizeX; ++j)


{
sum[j] += *tmpPtr * kernelY[k];
++tmpPtr;
}
}
for(n = 0; n < dataSizeX; ++n)

// convert and copy from sum

to out
{
*outPtr = sum[n];

// store final result to out

sum[n] = 0;

// reset to zero for next su

put array
mming
++outPtr;
partial kernel
}
// move to next row
tmpPtr2 += dataSizeX;
tmpPtr = tmpPtr2;
++kOffset;

// increase ending index of

// next input
// increase ending index of

kernel
}
// END OF VERTICAL CONVOLUTION ////////////////////////
// deallocate temp buffers
delete [] tmp;
delete [] sum;
return true;
}

///////////////////////////////////////////////////////////////////////////////
// 2D Convolution Fast
// In order to improve the performance, this function uses multple cursors of
// input signal. It avoids indexing input array during convolution. And, the
// input signal is partitioned to 9 different sections, so we don't need to
// check the boundary for every samples.
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// unsigned char (8-bit) version
///////////////////////////////////////////////////////////////////////////////
bool convolve2DFast(unsigned char* in, unsigned char* out, int dataSizeX, int da
taSizeY,
float* kernel, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n, x, y, t;
unsigned char **inPtr, *outPtr, *ptr;
int kCenterX, kCenterY;
int rowEnd, colEnd;
// ending indice for section
divider
float sum;
// temp accumulation buffer
int k, kSize;
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;

// find center position of kernel (half of kernel size)


kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
kSize = kernelSizeX * kernelSizeY;
// total kernel size
// allocate memeory for multi-cursor
inPtr = new unsigned char*[kSize];
if(!inPtr) return false;

// allocation error

// set initial position of multi-cursor, NOTE: it is swapped instead of kern


el
ptr = in + (dataSizeX * kCenterY + kCenterX); // the first cursor is shifted
(kCenterX, kCenterY)
for(m=0, t=0; m < kernelSizeY; ++m)
{
for(n=0; n < kernelSizeX; ++n, ++t)
{
inPtr[t] = ptr - n;
}
ptr -= dataSizeX;
}
// init working pointers
outPtr = out;
rowEnd = dataSizeY - kCenterY;

// bottom row partition divi

colEnd = dataSizeX - kCenterX;


vider

// right column partition di

der

// convolve rows from index=0 to index=kCenterY-1


y = kCenterY;
for(i=0; i < kCenterY; ++i)
{
// partition #1 ***********************************
x = kCenterX;
for(j=0; j < kCenterX; ++j)
// column from index=0 to in
dex=kCenterX-1
{
sum = 0;
t = 0;
for(m=0; m <= y; ++m)
{
for(n=0; n <= x; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += (kernelSizeX - x - 1);
// jump to next row
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #2 ***********************************

for(j=kCenterX; j < colEnd; ++j)


X to index=(dataSizeX-kCenterX-1)
{
sum = 0;
t = 0;
for(m=0; m <= y; ++m)
{
for(n=0; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
}

// column from index=kCenter

// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #3 ***********************************
x = 1;
for(j=colEnd; j < dataSizeX; ++j)
// column from index=(dataSi
zeX-kCenter) to index=(dataSizeX-1)
{
sum = 0;
t = x;
for(m=0; m <= y; ++m)
{
for(n=x; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += x;
// jump to next row
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
++y;
lve for next run
}

// add one more row to convo

// convolve rows from index=kCenterY to index=(dataSizeY-kCenterY-1)


for(i= kCenterY; i < rowEnd; ++i)
// number of rows
{
// partition #4 ***********************************
x = kCenterX;
for(j=0; j < kCenterX; ++j)
// column from index=0 to in
dex=kCenterX-1
{
sum = 0;
t = 0;
for(m=0; m < kernelSizeY; ++m)

{
for(n=0; n <= x; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += (kernelSizeX - x - 1);
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #5 ***********************************
for(j = kCenterX; j < colEnd; ++j)
// column from index=kCenter
X to index=(dataSizeX-kCenterX-1)
{
sum = 0;
t = 0;
for(m=0; m < kernelSizeY; ++m)
{
for(n=0; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++inPtr[t]; // in this partition, all cursors are used to co
nvolve. moving cursors to next is safe here
++t;
}
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
}
// partition #6 ***********************************
x = 1;
for(j=colEnd; j < dataSizeX; ++j)
// column from index=(dataSi
zeX-kCenter) to index=(dataSizeX-1)
{
sum = 0;
t = x;
for(m=0; m < kernelSizeY; ++m)
{
for(n=x; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += x;
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;

for(k=0; k < kSize; ++k) ++inPtr[k];

// move all cursors to next

}
}
// convolve rows from index=(dataSizeY-kCenterY) to index=(dataSizeY-1)
y = 1;
for(i= rowEnd; i < dataSizeY; ++i)
// number of rows
{
// partition #7 ***********************************
x = kCenterX;
for(j=0; j < kCenterX; ++j)
// column from index=0 to in
dex=kCenterX-1
{
sum = 0;
t = kernelSizeX * y;
for(m=y; m < kernelSizeY; ++m)
{
for(n=0; n <= x; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += (kernelSizeX - x - 1);
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #8 ***********************************
for(j=kCenterX; j < colEnd; ++j)
// column from index=kCenter
X to index=(dataSizeX-kCenterX-1)
{
sum = 0;
t = kernelSizeX * y;
for(m=y; m < kernelSizeY; ++m)
{
for(n=0; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
}
// partition #9 ***********************************
x = 1;
for(j=colEnd; j < dataSizeX; ++j)
// column from index=(dataSi
zeX-kCenter) to index=(dataSizeX-1)
{

sum = 0;
t = kernelSizeX * y + x;
for(m=y; m < kernelSizeY; ++m)
{
for(n=x; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += x;
}
// store output
*outPtr = (unsigned char)((float)fabs(sum) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
++y;
increased
}

// the starting row index is

return true;
}

///////////////////////////////////////////////////////////////////////////////
// Fast 2D Convolution using integer multiplication instead of float.
// Multiply coefficient(factor) to accumulated sum at last.
// NOTE: IT IS NOT FASTER THAN FLOAT MULTIPLICATION, TRY YOURSELF!!!
///////////////////////////////////////////////////////////////////////////////
bool convolve2DFast2(unsigned char* in, unsigned char* out, int dataSizeX, int d
ataSizeY,
int* kernel, float factor, int kernelSizeX, int kernelSizeY)
{
int i, j, m, n, x, y, t;
unsigned char **inPtr, *outPtr, *ptr;
int kCenterX, kCenterY;
int rowEnd, colEnd;
// ending indice for section
divider
int sum;
// temp accumulation buffer
int k, kSize;
// check validity of params
if(!in || !out || !kernel) return false;
if(dataSizeX <= 0 || kernelSizeX <= 0) return false;
// find center position of kernel (half of kernel size)
kCenterX = kernelSizeX >> 1;
kCenterY = kernelSizeY >> 1;
kSize = kernelSizeX * kernelSizeY;
// total kernel size
// allocate memeory for multi-cursor
inPtr = new unsigned char*[kSize];
if(!inPtr) return false;

// allocation error

// set initial position of multi-cursor, NOTE: it is swapped instead of kern


el

ptr = in + (dataSizeX * kCenterY + kCenterX); // the first cursor is shifted


(kCenterX, kCenterY)
for(m=0, t=0; m < kernelSizeY; ++m)
{
for(n=0; n < kernelSizeX; ++n, ++t)
{
inPtr[t] = ptr - n;
}
ptr -= dataSizeX;
}
// init working pointers
outPtr = out;
rowEnd = dataSizeY - kCenterY;

// bottom row partition divi

colEnd = dataSizeX - kCenterX;


vider

// right column partition di

der

// convolve rows from index=0 to index=kCenterY-1


y = kCenterY;
for(i=0; i < kCenterY; ++i)
{
// partition #1 ***********************************
x = kCenterX;
for(j=0; j < kCenterX; ++j)
// column from index=0 to in
dex=kCenterX-1
{
sum = 0;
t = 0;
for(m=0; m <= y; ++m)
{
for(n=0; n <= x; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += (kernelSizeX - x - 1);
// jump to next row
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #2 ***********************************
for(j=kCenterX; j < colEnd; ++j)
// column from index=kCenter
X to index=(dataSizeX-kCenterX-1)
{
sum = 0;
t = 0;
for(m=0; m <= y; ++m)
{
for(n=0; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}

}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #3 ***********************************
x = 1;
for(j=colEnd; j < dataSizeX; ++j)
// column from index=(dataSi
zeX-kCenter) to index=(dataSizeX-1)
{
sum = 0;
t = x;
for(m=0; m <= y; ++m)
{
for(n=x; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += x;
// jump to next row
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
++y;
lve for next run
}

// add one more row to convo

// convolve rows from index=kCenterY to index=(dataSizeY-kCenterY-1)


for(i= kCenterY; i < rowEnd; ++i)
// number of rows
{
// partition #4 ***********************************
x = kCenterX;
for(j=0; j < kCenterX; ++j)
// column from index=0 to in
dex=kCenterX-1
{
sum = 0;
t = 0;
for(m=0; m < kernelSizeY; ++m)
{
for(n=0; n <= x; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += (kernelSizeX - x - 1);
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;

++x;
for(k=0; k < kSize; ++k) ++inPtr[k];

// move all cursors to next

}
// partition #5 ***********************************
for(j = kCenterX; j < colEnd; ++j)
// column from index=kCenter
X to index=(dataSizeX-kCenterX-1)
{
sum = 0;
t = 0;
for(m=0; m < kernelSizeY; ++m)
{
for(n=0; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++inPtr[t]; // in this partition, all cursors are used to co
nvolve. moving cursors to next is safe here
++t;
}
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
}
// partition #6 ***********************************
x = 1;
for(j=colEnd; j < dataSizeX; ++j)
// column from index=(dataSi
zeX-kCenter) to index=(dataSizeX-1)
{
sum = 0;
t = x;
for(m=0; m < kernelSizeY; ++m)
{
for(n=x; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += x;
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
}
// convolve rows from index=(dataSizeY-kCenterY) to index=(dataSizeY-1)
y = 1;
for(i= rowEnd; i < dataSizeY; ++i)
// number of rows
{
// partition #7 ***********************************
x = kCenterX;
for(j=0; j < kCenterX; ++j)
// column from index=0 to in
dex=kCenterX-1

{
sum = 0;
t = kernelSizeX * y;
for(m=y; m < kernelSizeY; ++m)
{
for(n=0; n <= x; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += (kernelSizeX - x - 1);
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
// partition #8 ***********************************
for(j=kCenterX; j < colEnd; ++j)
// column from index=kCenter
X to index=(dataSizeX-kCenterX-1)
{
sum = 0;
t = kernelSizeX * y;
for(m=y; m < kernelSizeY; ++m)
{
for(n=0; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
}
// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
}
// partition #9 ***********************************
x = 1;
for(j=colEnd; j < dataSizeX; ++j)
// column from index=(dataSi
zeX-kCenter) to index=(dataSizeX-1)
{
sum = 0;
t = kernelSizeX * y + x;
for(m=y; m < kernelSizeY; ++m)
{
for(n=x; n < kernelSizeX; ++n)
{
sum += *inPtr[t] * kernel[t];
++t;
}
t += x;
}

// store output
*outPtr = (unsigned char)(fabs(sum * factor) + 0.5f);
++outPtr;
++x;
for(k=0; k < kSize; ++k) ++inPtr[k];
// move all cursors to next
}
++y;
increased
}
return true;
}

// the starting row index is

You might also like