/*
 * Source code to go with "Introduction to implementing neural networks"
 * Copyright 2015, Arjan van de Ven
 * Provided under the terms of the Apache-2.0 license (see LICENSE file)
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include <stdint.h>


#define SYNAPSES 3

#define INPUTS  2
#define OUTPUTS 1
#define HIDDEN  2
#define BIAS    1
#define NETSIZE (BIAS + INPUTS + HIDDEN + OUTPUTS)

/* convenince constants */
#define HIDDEN_START (BIAS + INPUTS)
#define HIDDEN_END   (BIAS + INPUTS + HIDDEN)
#define OUTPUT_START (BIAS + INPUTS + HIDDEN)
#define OUTPUT_END   (BIAS + INPUTS + HIDDEN + OUTPUTS)


#define MAXINPUT 4


/* The training set definition, a set of inputs with expected outputs */
struct input {
  double values[INPUTS];
  int output[OUTPUTS];
};

struct input inputs[MAXINPUT];


/* The basic neuron */

struct neuron {
  double 	value;             /* the (output) value of the neuron */

  int 		inputs[SYNAPSES];  /* which neurons are the input */
  double 	weights[SYNAPSES]; /* and their weights */
};

/* 
 * A network is a set of neurons
 *
 * For convenience this is represented as a flat array, where
 * the neurons are, in order,
 *
 * count	type
 * =========================================================================
 * BIAS		The bias neuron(s) for getting fixed values into the network
 * INPUTS	The inputs to the network
 * HIDDEN	The hidden layer neurons
 * OUTPUTS	The outputs of the network
 */

struct network {
  struct neuron neurons[NETSIZE];  
};


static double learning_rate = 0.5;


/*
 * Filling in the training set. 
 * For simple networks one can hardcode the inputs like this,
 * but for more complex problems it's usually more convenient
 * to load these from a file on disk.
 */
static void create_training_set(void)
{
	inputs[0].values[0] = 0;
	inputs[0].values[1] = 0;
	inputs[0].output[0] = 0;

	inputs[1].values[0] = 1;
	inputs[1].values[1] = 0;
	inputs[1].output[0] = 1;

	inputs[2].values[0] = 0;
	inputs[2].values[1] = 1;
	inputs[2].output[0] = 1;

	inputs[3].values[0] = 1;
	inputs[3].values[1] = 1;
	inputs[3].output[0] = 0;

}


/* 
 * Helper function 
 * Return a random number between -1.0 and 1.0 
 */
static double urand(void) 
{
	double d;
	int64_t j;

	j = rand();

	d = j / (1.0 * RAND_MAX);

	if (rand() & 1)
		d = - d;

	return 1.0 * d;
}

/*
 * The "activation function" of a neuron
 */

static double activation(double v)
{
	return 1.0 / ( 1 + exp(-v));
}


static void calculate_neuron(struct network *network, int index)
{
	int i;
	double input_sum;
	struct neuron *neuron;
	
	neuron = &network->neurons[index];

	/* add up the inputs to the neuron, multiplied by their weight */
	input_sum = 0.0;
	for (i = 0; i < SYNAPSES; i++) {
		input_sum += neuron->weights[i] * network->neurons[ neuron->inputs[i] ].value;
	}

	/* and transform it through the activation function */

	network->neurons[index].value = activation(input_sum);
}

static void calculate_network(struct network *network)
{
	int i;

	for (i = INPUTS + 1; i < NETSIZE; i++)
		calculate_neuron(network, i);
}

static void update_weight(struct neuron *neuron, int index, double delta)
{
	neuron->weights[index] += delta;
}

static double compute_one_input(struct network *network, struct input *input)
{
	int i;
	double error_sum = 0.0;

	/* set the BIAS neuron to 1 */
	network->neurons[0].value = 1.0;

	/* set the input neurons to the values from the training data */
	for (i = 0; i < INPUTS; i++)
		network->neurons[BIAS+i].value = input->values[i];

	calculate_network(network);

	/* now calculate the error for all outputs */
	for (i = 0; i < OUTPUTS; i++) {
		double error;
		double gradient;
		struct neuron *neuron;
		int h;

		neuron = &network->neurons[OUTPUT_START + i];

		error = input->output[i] - neuron->value;

		error_sum += error * error;

		/* Back propagation: the error propagates back via the inputs of the output neuron */
		gradient = neuron->value * (1.0 - neuron->value) * error;

		for (h = 0; h < BIAS + HIDDEN; h++) {
			double delta;
			double hidden_gradient;
			struct neuron *hidden_neuron;
			int inp;

			hidden_neuron = &network->neurons[ neuron->inputs[h] ];

			/* calculate the delta */			
			delta = learning_rate * gradient * hidden_neuron->value;

			/* calcuate the (recursive) gradient */	
			hidden_gradient = hidden_neuron->value * (1 - hidden_neuron->value) * gradient * neuron->weights[h];

			/* adjust the weight */			
			update_weight(neuron, h, delta);


			/* further propagate the error to the input layer */
			for (inp = 0; inp < BIAS + INPUTS; inp++) {
				struct neuron *input_neuron;

				input_neuron = &network->neurons[ hidden_neuron->inputs[inp] ];

				delta = learning_rate * input_neuron->value * hidden_gradient;
				update_weight(hidden_neuron, inp, delta);
			}

			
		}		
		
	}

	return error_sum;
}

static double compute_for_all_inputs(struct network *network)
{
	int i;
	double error_sum = 0.0;

	for (i = 0; i < MAXINPUT; i++)
		error_sum += compute_one_input(network, &inputs[i]);

	return error_sum;
}


static struct network *allocate_network(void)
{
	struct network *n;
	int i,k;

	n = calloc(sizeof(struct network), 1);

	/* create the network structure */
	/* the hidden layer neurons connect from all input and bias neurons, with random weight */
	for (i = HIDDEN_START; i < HIDDEN_END; i++) {
		for (k = 0; k < BIAS + INPUTS; k++) {
			n->neurons[i].inputs[k] = k;
			n->neurons[i].weights[k] = urand();
		}
	}

	/* the output neurons connect from all hidden neurons, with random weight */
	for (i = OUTPUT_START; i < OUTPUT_END; i++) {
		/* set up the link to the bias neuron */
		n->neurons[i].inputs[0] = 0; 
		n->neurons[i].weights[0] = urand(); 
		/* and then connect up each of the neurons in the hidden layer */
		for (k = 0; k < HIDDEN; k++) {
			n->neurons[i].inputs[k + BIAS] = HIDDEN_START + k;
			n->neurons[i].weights[k + BIAS] = urand();
		}
	}

	return n;
}

static void free_network(struct network *n)
{
	free(n);
}


/* 
 * Output the structure of the network to a ".dot" file that
 * can be turned into a graphic with the dot program from
 * the "graphviz" project:
 *
 * dot filename.dot -O -Tpng
 */
static void output_network(char *filename, struct network *n)
{
	FILE *file;
	int i, j;
	
	file = fopen(filename, "w");
	if (!file)
		return;
	fprintf(file, "digraph G {\n");
	fprintf(file, "\t { \n");
	fprintf(file, "\t\t rank=source;\n");
	for (i = 0; i < INPUTS + BIAS; i++) {
		fprintf(file,"\t\t%i;\n", i);
	}
	fprintf(file, "\t}\n");
	fprintf(file, "\t { \n");
	fprintf(file, "\t\t rank=sink;\n");
	for (i = OUTPUT_START; i < OUTPUT_END; i++) {
		fprintf(file,"\t\t%i;\n", i);
	}
	fprintf(file, "\t}\n");
	for (i = INPUTS + BIAS; i < NETSIZE; i++)  {
		for (j = 0; j < SYNAPSES; j++) {
			if (fabs(n->neurons[i].weights[j])>0.001 ) {
				fprintf(file, " %i -> %i [label = %4.1f] ;\n", n->neurons[i].inputs[j], i, n->neurons[i].weights[j]);
			}
		}
	}
	fprintf(file, "\tsubgraph inputcluster { \n");
	for (i = 0; i < INPUTS + BIAS; i++) {
		fprintf(file,"\t\t%i;\n", i);
	}
	fprintf(file, "\t}\n");
	fprintf(file, "\tsubgraph outputcluster { \n");
	for (i = NETSIZE-OUTPUTS; i < NETSIZE; i++) {
		fprintf(file,"\t\t%i;\n", i);
	}
	fprintf(file, "\t}\n");
	fprintf(file, "}\n");
	fclose(file);
}


int main(int argc, char **argv)
{
	struct network *net;
	double error;
	int i;


	/* initialize the random number generator */
	srand(time(NULL) + argc + strlen(argv[0]));

	create_training_set();

	net = allocate_network();

	error = compute_for_all_inputs(net);
	printf("Error is %5.4f\n", error);


	for (i = 0; i <= 5000; i++) {
		error = compute_for_all_inputs(net);

		/* every 10 rounds, print a status update */
		if (i % 10 == 0)
			printf("Round %4i: Error is %5.4f\n", i, error);

	}
	
	output_network("final.dot", net);

	free_network(net);

	return EXIT_SUCCESS;
}