This is a short implementation of Convolutional Neural Network in C language.
Tensor type is a simple struct that contains the information of the dimension of the arrays, and the tensor data (a pointer to an 1-dim array).
header file
// conv2d.h
#ifndef __CONV2D_H__
#define __CONV2D_H__
#include "meta_header.h"
void naive_conv2d(tensor *input, tensor *output, tensor *kernel, int *stride, float *bias);
#endif
Implementation File
#include <stdio.h>
#include "conv2d.h"
// Input shape: [batch_shape, height, width, channels]
// kernel shape: [filter_height, filter_width, input_chanels(n_channels), output_channelsi(n_filters)]
// stride shape: [1, H, W, 1]
// Reference:
// https://sahnimanas.github.io/post/anatomy-of-a-high-performance-convolution/
void naive_conv2d(tensor *input, tensor *output, tensor *kernel, int *stride, float *bias)
{
int batch_size = input->shape[0];
int in_channels = input->shape[3];
if (in_channels != kernel->shape[2]) {
printf("kernel in_channels and input_channels doesn't match\n");
return;
}
int kernel_height = kernel->shape[0];
int kernel_width = kernel->shape[1];
int out_channels = kernel->shape[3];
int out_height = output->shape[1];
int out_width = output->shape[2];
if (out_height != (input->shape[1] - kernel->shape[0]) / stride[1] + 1){
printf("the ouput_height is incorrect\n");
return;
}
if (out_width != (input->shape[2] - kernel->shape[1]) / stride[2] + 1){
printf("the ouput_width is incorrect\n");
return;
}
int stride_h = stride[1];
int stride_w = stride[2];
float val;
for (int batch = 0; batch < batch_size; batch++){
for (int o_c = 0; o_c < out_channels; o_c++){
for (int o_h =0; o_h < out_height; o_h++){
for (int o_w = 0; o_w < out_width; o_w++){
val = bias[o_c];
for(int i_c = 0; i_c < in_channels; i_c++){
for (int k_h = 0; k_h < kernel_height; k_h++){
for (int k_w = 0; k_w < kernel_width; k_w++){
// val += kernel->data[k_h][k_w][i_c][o_c]
// * input->data[batch][o_h*stride_h+k_h][o_w*stride_w+k_w][i_c]
val += kernel->data[o_c + out_channels*(i_c + in_channels*(k_w + kernel_width*(k_h)))]
* input->data[i_c + in_channels*((o_w*stride_w+k_w) + out_width*((o_h*stride_h+k_h) + out_height*batch))];
}
}
}
// output->data[batch][o_h][o_w][o_c]
output->data[o_c + out_channels*(o_w + out_width*(o_h + out_height*(batch)))] = val;
}
}
}
}
}
My Tensor data is saved as a 1-dim array. So the key here is to read that array as a $n$-dim array.
It is called as the Indexing of Mulitidimensional Arrays. And it is implemented in the nested for-loop above. I would like to make a post on Indexing of Multidimensional Arrays sometime thoroughly.
Till then Ciao!