/*Required Libraries*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include<conio.h>
/* Include MNIST Dataset */
#include "mnist.h"
/* Definition of Matrix Struct */
typedef struct{
float** elements;
int rows;
int columns;
} Matrix;
float learningRate = 0.001;
/*---------------Matrix Operators--------------*/
/* Returns Random Values Between -0.5 And +0.5 */
float RAND_FLOAT(){
return ((float) rand() / (float)(RAND_MAX)) - 0.5 ;
}
/* Creates A row x column struct of type of datatype matrix*/
Matrix* createMatrix(int row, int col) {
Matrix *matrix = malloc(sizeof(Matrix));
matrix->rows = row;
matrix->columns = col;
matrix->elements = malloc(row * sizeof(float*));
for (int i = 0; i < row; i++) {
matrix->elements[i] = malloc(col * sizeof(float));
}
return matrix;
}
/* Transposes The Matrix */
Matrix* transposeMatrix(Matrix* m) {
Matrix* mat = createMatrix(m->columns, m->rows);
for (int i = 0; i < m->rows; i++) {
for (int j = 0; j < m->columns; j++) {
mat->elements[j][i] = m->elements[i][j];
}
}
return mat;
}
/* Print the Matrix */
void printMatrix(Matrix* m) {
printf("Rows: %d Columns: %d\n", m->rows, m->columns);
for (int i = 0; i < m->rows; i++) {
for (int j = 0; j < m->columns; j++) {
printf("%1.1f ", m->elements[i][j]);
}
printf("\n");
}
}
/* Calls The Rand_Float() Function to Fill The Matrix with values from -0.5 to 0.5*/
void fillMatrix(Matrix *m) {
for (int i = 0; i < m->rows; i++) {
for (int j = 0; j < m->columns; j++) {
m->elements[i][j] = RAND_FLOAT();
}
}
}
/* Return The Dot Of The Matricies */
Matrix* dot(Matrix *m1, Matrix *m2) {
if (m1->columns == m2->rows) {
Matrix *m = createMatrix(m1->rows, m2->columns);
for (int i = 0; i < m1->rows; i++) {
for (int j = 0; j < m2->columns; j++) {
float sum = 0;
for (int k = 0; k < m2->rows; k++) {
sum += m1->elements[i][k] * m2->elements[k][j];
}
m->elements[i][j] = sum;
}
}
return m;
} else {
printf("Dimension mistmatch dot: %dx%d %dx%d\n", m1->rows, m1->columns, m2->rows, m2->columns);
exit(1);
}
}
/* Returns a Matrix with Elementwise addition of Matricies*/
Matrix* add(Matrix *m1, Matrix *m2) {
if (m1->rows == m2->rows && m1->columns == m2->columns) {
Matrix *m = createMatrix(m1->rows, m1->columns);
for (int i = 0; i < m1->rows; i++) {
for (int j = 0; j < m2->columns; j++) {
m->elements[i][j] = m1->elements[i][j] + m2->elements[i][j];
}
}
return m;
} else {
printf("Dimension mistmatch add: %dx%d %dx%d\n", m1->rows, m1->columns, m2->rows, m2->columns);
exit(1);
}
}
/* Returns Index Of The Maximum Element in a Matrix*/
int argMax(Matrix *m){
/* Expects a column matrix */
float maxElement = 0;
int maxIndex = 0;
for (int i = 0; i < m->rows; i++) {
if (m->elements[i][0] > maxElement) {
maxElement = m->elements[i][0];
maxIndex = i;
}
}
return maxIndex;
}
/* Creates A copy of a Matrix */
Matrix* copyMatrix(Matrix* m) {
Matrix* mat = createMatrix(m->rows, m->columns);
for (int i = 0; i < m->rows; i++) {
for (int j = 0; j < m->columns; j++) {
mat->elements[i][j] = m->elements[i][j];
}
}
return mat;
}
/* Returns a Matrix With Elementwise Multiplication */
Matrix* multiply(Matrix *m1, Matrix *m2) {
if (m1->rows == m2->rows && m1->columns == m2->columns) {
Matrix *m = createMatrix(m1->rows, m1->columns);
for (int i = 0; i < m1->rows; i++) {
for (int j = 0; j < m2->columns; j++) {
m->elements[i][j] = m1->elements[i][j] * m2->elements[i][j];
}
}
return m;
} else {
printf("Dimension mistmatch multiply: %dx%d %dx%d\n", m1->rows, m1->columns, m2->rows, m2->columns);
exit(1);
}
}
/* Return Sum Of All The Elements In A Matrix */
float sum(Matrix *m){
float ans = 0.00;
for (int i = 0; i < m->rows; i++) {
for (int j = 0; j < m->columns; j++) {
ans += m->elements[i][j];
}
}
return ans;
}
/* Returns a Matrix with element-wise difference */
Matrix* subtract(Matrix *m1, Matrix *m2) {
if (m1->rows == m2->rows && m1->columns == m2->columns) {
Matrix *m = createMatrix(m1->rows, m1->columns);
for (int i = 0; i < m1->rows; i++) {
for (int j = 0; j < m2->columns; j++) {
m->elements[i][j] = m1->elements[i][j] - m2->elements[i][j];
}
}
return m;
} else {
printf("Dimension mistmatch subtract: %dx%d %dx%d\n", m1->rows, m1->columns, m2->rows, m2->columns);
exit(1);
}
}
/* Returns a Matrix Each Element Multiplied by A number */
Matrix* multiplyScalar(float n, Matrix* m) {
Matrix* mat = copyMatrix(m);
for (int i = 0; i < m->rows; i++) {
for (int j = 0; j < m->columns; j++) {
mat->elements[i][j] *= n;
}
}
return mat;
}
/* Returns a Matrix Each Element subtracted by A number */
Matrix* subScalar(float n, Matrix* m) {
Matrix* mat = copyMatrix(m);
for (int i = 0; i < m->rows; i++) {
for (int j = 0; j < m->columns; j++) {
mat->elements[i][j] -= n;
}
}
return mat;
}
/* Frees Memory Allocated To The Matrix */
void freeMatrix(Matrix* matrix) {
for (int i = 0; i < matrix->rows; i++) {
free(matrix->elements[i]);
}
free(matrix->elements);
free(matrix);
}
/*------------ACTIVATION FUNTIONS--------------- */
/* RELU Activation Function Replaces Matrix Elements With 0 if x < 0 and x if x > 0*/
Matrix* RELU(Matrix *m1){
Matrix *m = createMatrix(m1->rows, m1->columns);
for (int i = 0; i < m1->rows; i++) {
for (int j = 0; j < m1->columns; j++) {
if(m1->elements[i][j] > 0)
m->elements[i][j] = m1->elements[i][j];
}
}
return m;
}
/* Coverts Numerical Values into Probability Distribution */
Matrix* Softmax(Matrix *m){
double total = 0;
for (int i = 0; i < m->rows; i++) {
for (int j = 0; j < m->columns; j++) {
total += exp(m->elements[i][j]);
}
}
Matrix* mat = createMatrix(m->rows, m->columns);
for (int i = 0; i < mat->rows; i++) {
for (int j = 0; j < mat->columns; j++) {
mat->elements[i][j] = exp(m->elements[i][j]) / total;
}
}
return mat;
}
/* Derivative Funtion of RELU Activation Function For Back Propagation*/
Matrix* RELUPrime(Matrix* m) {
Matrix* result = createMatrix(m->rows, m->columns);
for (int i = 0; i < m->rows; i++) {
for (int j = 0; j < m->columns; j++) {
if (m->elements[i][j] > 0) {
result->elements[i][j] = 1;
} else {
result->elements[i][j] = 0;
}
}
}
return result;
}
/*---------------Main --------------*/
void main(){
srand(time(NULL));
load_mnist();
/* Create Both Matricies For Train And Test Matrix */
Matrix* TrainX = createMatrix(784, 600);
Matrix* TestX = createMatrix(784, 100);
/*Transpose Train And Test Image Array and put it into a matrix*/
for (int i = 0; i < 600; i++) {
for (int j = 0; j < 784; j++) {
TrainX->elements[j][i] = train_image[i][j];
}
}
for (int i = 0; i < 100; i++) {
for (int j = 0; j < 784; j++) {
TestX->elements[j][i] = test_image[i][j];
}
}
/* Initialise Weights And Bias Matricies */
Matrix* W1 = createMatrix(784, 784);
fillMatrix(W1);
Matrix* B1 = createMatrix(784, 1);
fillMatrix(B1);
Matrix* W2 = createMatrix(10, 784);
fillMatrix(W2);
Matrix* B2 = createMatrix(10, 1);
fillMatrix(B2);
/* Training */
for(int i = 0; i < 2; i++){
/*Feed Forward */
int label = train_label[i];
Matrix* X = createMatrix(784,1);
Matrix* Y = createMatrix(10, 1);
Y->elements[label][0] = 1; /*One Hot Encoded Actual Label*/
for(int j = 0; j < 784; j++){ X->elements[j][i] = TrainX->elements[j][i]; } /* Flatten Matrix For Each Entry*/
Matrix* dotMat1 = dot(W1,X);
Matrix* Z1 = add(dotMat1,B1);
Matrix* A1 = RELU(Z1);
freeMatrix(dotMat1);
Matrix* dotMat2 = dot(W2,A1);
Matrix* Z2 = add(dotMat2,B2);
Matrix* A2 = Softmax(Z2);
freeMatrix(dotMat2);
freeMatrix(Z2);
/*Back Propagation */
Matrix* dZ2 = subtract(A2, Y);
Matrix* transposeA1 = transposeMatrix(A1);
Matrix* dW2 = dot(dZ2,transposeA1);
freeMatrix(transposeA1);
freeMatrix(A1);
freeMatrix(A2);
Matrix* transposeW2 = transposeMatrix(W2);
Matrix* ReluPrimeZ1 = RELUPrime(Z1);
Matrix* dotMat3 = dot(transposeW2,dZ2);
Matrix* dZ1 = multiply(dotMat3,ReluPrimeZ1);
Matrix* transposeX = transposeMatrix(X);
Matrix* dW1 = dot(dZ1,transposeX);
freeMatrix(Z1);
freeMatrix(transposeW2);
freeMatrix(ReluPrimeZ1);
freeMatrix(dotMat3);
freeMatrix(transposeX);
float dB1 = sum(dZ1)*learningRate;
float dB2 = sum(dZ2)*learningRate;
freeMatrix(dZ1);
freeMatrix(dZ2);
printMatrix(W2);
printf("--%d-- \n \n \n ",i);
/*Update Parameters*/
Matrix* multiplieddW1 = multiplyScalar(learningRate,dW1);
Matrix* multiplieddW2 = multiplyScalar(learningRate,dW2);
free(dW1);
free(dW2);
Matrix* newW1 = subtract(W1,multiplieddW1);
Matrix* newW2 = subtract(W2,multiplieddW2);
freeMatrix(multiplieddW1);
freeMatrix(multiplieddW2);
freeMatrix(W1);
freeMatrix(W2);
Matrix* W1 = copyMatrix(newW1);
Matrix* W2 = copyMatrix(newW2);
freeMatrix(newW1);
freeMatrix(newW2);
Matrix* newB1 = subScalar(dB1,B1);
Matrix* newB2 = subScalar(dB2,B2);
freeMatrix(B1);
freeMatrix(B2);
Matrix* B1 = newB1;
Matrix* B2 = newB2;
freeMatrix(newB1);
freeMatrix(newB2);
freeMatrix(X);
freeMatrix(Y);
}
}
我有一个检查矩阵的点积退出程序,如果尺寸不匹配的要求后,只有一个迭代,它返回一个错误,尺寸不匹配idk值是如何变化的,并在运行程序多次,它是返回随机值的行和列
这只发生在w1和w2矩阵上
我试着打印出循环开始和结束时的尺寸在第一个循环结束时尺寸是正确的但是当循环返回到开始时尺寸被改变了
我还尝试在循环开始时对维度进行硬编码
W1 -> rows = 784;
W1 -> columns = 784;
但这会冻结程序
mnist. h文件是
Takafumi Hoiruchi. 2018.
https://github.com/takafumihoriuchi/MNIST_for_C
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
// set appropriate path for data
#define TRAIN_IMAGE "./data/train-images.idx3-ubyte"
#define TRAIN_LABEL "./data/train-labels.idx1-ubyte"
#define TEST_IMAGE "./data/t10k-images.idx3-ubyte"
#define TEST_LABEL "./data/t10k-labels.idx1-ubyte"
#define SIZE 784 // 28*28
#define NUM_TRAIN 60000
#define NUM_TEST 10000
#define LEN_INFO_IMAGE 4
#define LEN_INFO_LABEL 2
#define MAX_IMAGESIZE 1280
#define MAX_BRIGHTNESS 255
#define MAX_FILENAME 256
#define MAX_NUM_OF_IMAGES 1
unsigned char image[MAX_NUM_OF_IMAGES][MAX_IMAGESIZE][MAX_IMAGESIZE];
int width[MAX_NUM_OF_IMAGES], height[MAX_NUM_OF_IMAGES];
int info_image[LEN_INFO_IMAGE];
int info_label[LEN_INFO_LABEL];
unsigned char train_image_char[NUM_TRAIN][SIZE];
unsigned char test_image_char[NUM_TEST][SIZE];
unsigned char train_label_char[NUM_TRAIN][1];
unsigned char test_label_char[NUM_TEST][1];
double train_image[NUM_TRAIN][SIZE];
double test_image[NUM_TEST][SIZE];
int train_label[NUM_TRAIN];
int test_label[NUM_TEST];
void FlipLong(unsigned char * ptr)
{
register unsigned char val;
// Swap 1st and 4th bytes
val = *(ptr);
*(ptr) = *(ptr+3);
*(ptr+3) = val;
// Swap 2nd and 3rd bytes
ptr += 1;
val = *(ptr);
*(ptr) = *(ptr+1);
*(ptr+1) = val;
}
void read_mnist_char(char *file_path, int num_data, int len_info, int arr_n, unsigned char data_char[][arr_n], int info_arr[])
{
int i, j, k, fd;
unsigned char *ptr;
if ((fd = open(file_path, O_RDONLY)) == -1) {
fprintf(stderr, "couldn't open image file");
exit(-1);
}
read(fd, info_arr, len_info * sizeof(int));
// read-in information about size of data
for (i=0; i<len_info; i++) {
ptr = (unsigned char *)(info_arr + i);
FlipLong(ptr);
ptr = ptr + sizeof(int);
}
// read-in mnist numbers (pixels|labels)
for (i=0; i<num_data; i++) {
read(fd, data_char[i], arr_n * sizeof(unsigned char));
}
close(fd);
}
void image_char2double(int num_data, unsigned char data_image_char[][SIZE], double data_image[][SIZE])
{
int i, j;
for (i=0; i<num_data; i++)
for (j=0; j<SIZE; j++)
data_image[i][j] = (double)data_image_char[i][j] / 255.0;
}
void label_char2int(int num_data, unsigned char data_label_char[][1], int data_label[])
{
int i;
for (i=0; i<num_data; i++)
data_label[i] = (int)data_label_char[i][0];
}
void load_mnist()
{
read_mnist_char(TRAIN_IMAGE, NUM_TRAIN, LEN_INFO_IMAGE, SIZE, train_image_char, info_image);
image_char2double(NUM_TRAIN, train_image_char, train_image);
read_mnist_char(TEST_IMAGE, NUM_TEST, LEN_INFO_IMAGE, SIZE, test_image_char, info_image);
image_char2double(NUM_TEST, test_image_char, test_image);
read_mnist_char(TRAIN_LABEL, NUM_TRAIN, LEN_INFO_LABEL, 1, train_label_char, info_label);
label_char2int(NUM_TRAIN, train_label_char, train_label);
read_mnist_char(TEST_LABEL, NUM_TEST, LEN_INFO_LABEL, 1, test_label_char, info_label);
label_char2int(NUM_TEST, test_label_char, test_label);
}
void print_mnist_pixel(double data_image[][SIZE], int num_data)
{
int i, j;
for (i=0; i<num_data; i++) {
printf("image %d/%d\n", i+1, num_data);
for (j=0; j<SIZE; j++) {
printf("%1.1f ", data_image[i][j]);
if ((j+1) % 28 == 0) putchar('\n');
}
putchar('\n');
}
}
void print_mnist_label(int data_label[], int num_data)
{
int i;
if (num_data == NUM_TRAIN)
for (i=0; i<num_data; i++)
printf("train_label[%d]: %d\n", i, train_label[i]);
else
for (i=0; i<num_data; i++)
printf("test_label[%d]: %d\n", i, test_label[i]);
}
// name: path for saving image (ex: "./images/sample.pgm")
void save_image(int n, char name[])
{
char file_name[MAX_FILENAME];
FILE *fp;
int x, y;
if (name[0] == '\0') {
printf("output file name (*.pgm) : ");
scanf("%s", file_name);
} else strcpy(file_name, name);
if ( (fp=fopen(file_name, "wb"))==NULL ) {
printf("could not open file\n");
exit(1);
}
fputs("P5\n", fp);
fputs("# Created by Image Processing\n", fp);
fprintf(fp, "%d %d\n", width[n], height[n]);
fprintf(fp, "%d\n", MAX_BRIGHTNESS);
for (y=0; y<height[n]; y++)
for (x=0; x<width[n]; x++)
fputc(image[n][x][y], fp);
fclose(fp);
printf("Image was saved successfully\n");
}
// save mnist image (call for each image)
// store train_image[][] into image[][][]
void save_mnist_pgm(double data_image[][SIZE], int index)
{
int n = 0; // id for image (set to 0)
int x, y;
width[n] = 28;
height[n] = 28;
for (y=0; y<height[n]; y++) {
for (x=0; x<width[n]; x++) {
image[n][x][y] = data_image[index][y * width[n] + x] * 255.0;
}
}
save_image(n, "");
}
1条答案
按热度按时间fd3cxomn1#
W1
和W2
将值从“end of the loop in main”更改为“top of the loop in main”的原因如下:在循环的中间,你定义了同名的新变量。这些变量在循环结束时超出了范围,所以一旦你到达顶部,
W1
和W2
引用循环之前定义的W1
和W2
。换句话说,循环顶部的W1
是另一个变量,而不是循环末尾的W1
。您可能希望执行以下操作:
这样就不用定义新变量了。(注意:B1和B2相同)
为了好玩,你可以尝试这样的代码:
这将交替打印42和5,因为顶部的
x
是另一个变量,而不是末尾的x
。顺便说一句:
我还注意到
不应该是
freeMatrix
吗