/* sandpile3d-256sink.c : Parallel version using MPI
 * Simulating independent stationary configurations
 * in the 3D Abelian sandpile model with Dirichlet boundary conditions
 * and checking which sites topple when adding a particle at the origin 0.
 * Specialized to L = 256, giving a box of size 2L x 2L
 * sink is all the boundaries
 * by Antal A. Jarai and Minwei Sun
 */
/* To compile:  mpicc -o sandpile3d-256sink sandpile3d-256sink.c mt19937ar-aj.c jump_mt19937-aj.c
 * To run:  mpirun ./sandpile3d-256sink <number of samples>
 * mt19937ar.c  source code of Mersenne Twister from author's website:
 * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html
 */

#include "mt19937ar-aj.h"   /* Header file for Random-Number Generator */
#include <stdio.h>          /* header files formats formula we used */
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "mpi.h"
#define MASTER 0         /* asking other codes to generating the sample defining constants used */
#define SAMPLE_REQUEST 3
#define SAMPLE_REPLY 4
#define L 256          /* Linear radius of box for simulation */
#define ROWSIZE 512 /* 2*L */
#define LAYERSIZE 262144 /* 2*L * 2*L */
#define ARRAYSIZE 134217728 /* 2*L * 2*L * 2*L */
#define BUFFERSIZE ARRAYSIZE

MPI_Comm world;
int sample_request;

#define north(pos) ((pos & 0b000000000111111111000000000) ? (pos - ROWSIZE) : (pos | 0b000000000111111111000000000)  )
#define south(pos) (((pos & 0b000000000111111111000000000) ^ 0b000000000111111111000000000) ? (pos + ROWSIZE) : (pos & 0b111111111000000000111111111) )
#define west(pos) ((pos & 0b111111111000000000000000000) ? (pos - LAYERSIZE) : (pos | 0b111111111000000000000000000) )
#define east(pos) (((pos & 0b111111111000000000000000000) ^ 0b111111111000000000000000000) ? (pos + LAYERSIZE) : (pos &0b000000000111111111111111111) )
#define left(pos)  ((pos & 0b000000000000000000111111111) ? (pos - 1) : (pos | 0b000000000000000000111111111) )
#define right(pos) (((pos & 0b000000000000000000111111111) ^ 0b000000000000000000111111111) ? (pos + 1) : (pos & 0b111111111111111111000000000) )

/* unsigned char sand[ARRAYSIZE]; */      /* sand configuration */
unsigned long int treepos[ARRAYSIZE];   /* replace treepos by treeneighbour, use macro to compute the treepos by knowing which treeneighbour is */
unsigned char treeneighbour[ARRAYSIZE];
unsigned char intree[ARRAYSIZE];
unsigned long int distance[ARRAYSIZE];

#define sand intree
#define topple treeneighbour
#define stack distance

long int stack_pointer;  /* Stack pointer for topplings */
unsigned long int box[61][61][61];
unsigned long int total_box[61][61][61];
unsigned long int vertices[L];       /* how many times each vertex toppled */
unsigned long int total_vertices[L];   /* all avalanches added together */
float portion[L];
unsigned long int prob[6] = { 0, 0, 0, 0, 0, 0};
unsigned long int total_prob[6] = { 0, 0, 0, 0, 0 , 0};
float probability[6];
unsigned long int seed;   /* Seed for random number generator */
unsigned long int nb[6];
FILE *box_file;
FILE *portion_file;
FILE *probability_file;
unsigned long *pf;
State s0;

/* Initializes the tables of neighbours, toppling counts and probability */
void initialize()
{
    long int i, j, k;
    
    for (i=0; i < L; i++)
        vertices[i] = 0;
    for (i=0; i < 6; i++)
        prob[i] = 0;
    for (i=0; i <= 60; i++)
        for (j=0; j <= 60; j++)
            for (k=0; k <= 60; k++)
            {
                box[i][j][k] = 0;
            }
    nb[0] = west(0);
    nb[1] = east(0);
    nb[2] = south(0);
    nb[3] = north(0);
    nb[4] = right(0);
    nb[5] = left(0);
    
    stack_pointer = 0;
}

/* Runs a random walk from startpos until tree is hit. Assumes that at least the root is marked as in the tree */
void run(unsigned long int startpos)
{
    register unsigned long int pos;
    unsigned long int newpos;
    register unsigned char stepLR, stepdir;
    unsigned long int steps;
    unsigned long int dist;
   
    pos = startpos;
    
    while (intree[pos] == 0)    /* not in the tree */
    {
        stepdir = (genrand_int32() >> 29);
        while ((stepdir == 0)||(stepdir == 7))
            stepdir = (genrand_int32() >> 29);
        stepLR = (stepdir & 0x1);
        stepdir >>= 1;
        if (stepLR)
        {
            if (!stepdir)
            {
                newpos = north(pos);
                treeneighbour[pos] = 3;
            }
            else if (! --stepdir)
            {
                newpos = west(pos);
                treeneighbour[pos] = 0;
            }
            else
            {
                newpos = left(pos);
                treeneighbour[pos] = 5;
            }
        }
        else
        {
            if (! --stepdir)
            {
                newpos = south(pos);
                treeneighbour[pos] = 2;
            }
            else if (! --stepdir)
            {
                newpos = east(pos);
                treeneighbour[pos] = 1;
            }
            else
            {
                newpos = right(pos);
                treeneighbour[pos] = 4;
            }
        }
        treepos[pos] = newpos;
        pos = newpos;
    }
    
    /* Compute distance to point where it hit */
    pos = startpos;
    steps = 0;
    while (intree[pos] == 0)
    {
        pos = treepos[pos];  /* Next step of LERW */
        steps++;           /* Count number of steps */
    }
    dist = distance[pos] + steps; /* Copy distance of the point where tree was hit and add the number of steps */
    /* Compute distances for each point along the path */
    pos = startpos;
    while (intree[pos] == 0)
    {
        intree[pos] = 1;  /* Mark the vertex as in the tree */
        distance[pos] = dist;  /* Record its distance */
        dist--;  /* Distance of next point on the path */
        pos = treepos[pos];  /* Move to next point of LERW */
    }
}

/* Computes the whole tree with distances */
void tree()
{
    register unsigned long int i, j;
    unsigned long int pos, sinkpos;
    
    for (i = 0; i < ARRAYSIZE; i++)
        intree[i] = 0;      /* Mark all vertices as not in the tree */
    for (i = 0; i < ROWSIZE; i++)
        for (j = 0; j < ROWSIZE; j++)
        {
            sinkpos = LAYERSIZE*i + ROWSIZE*j + L;
            intree[sinkpos] = 1; /* Mark the sink as being in the tree */
            distance[sinkpos] = 0; /* Sink has distance 0 from the root */
            sinkpos = LAYERSIZE*i + ROWSIZE*L + j;
            intree[sinkpos] = 1;
            distance[sinkpos] = 0;
            sinkpos = LAYERSIZE*L + ROWSIZE*i + j;
            intree[sinkpos] = 1;
            distance[sinkpos] = 0;
        }
    run(0);
    for (pos = 1; pos < ARRAYSIZE; pos++) /* how to skip the sink? */
    {
        run(pos);
    }
}

/* Compute the whole sandpile from the tree */
void sandconf()
{
    unsigned long int dist[6];
    register unsigned long int pos, sinkpos;
    unsigned long int disttoroot;
    unsigned char height;
    unsigned char found;
    int i, j;
    
    for (pos = 0; pos < ARRAYSIZE; pos++)
    {
        disttoroot = distance[pos];
        if (disttoroot)
	{ 
        disttoroot--;
        dist[0] = distance[west(pos)];
        dist[1] = distance[east(pos)];
        dist[2] = distance[south(pos)];
        dist[3] = distance[north(pos)];
        dist[4] = distance[right(pos)];
        dist[5] = distance[left(pos)];
        height = 5;
        found = 0;
        for (i = 5; i >= 0; i--)
        {
            if (dist[i] < disttoroot)
                height--;
            if (dist[i] == disttoroot)
            {
                if (treeneighbour[pos] == i)
                    found = 1;
                if (found == 0)
                    height--;
            }
        }
        sand[pos] = height;
        }
    }
    for (i = 0; i < ROWSIZE; i++)
        for (j = 0; j < ROWSIZE; j++)
        {
            sinkpos = LAYERSIZE*i + ROWSIZE*j + L;
            sand[sinkpos] = -1;
            sinkpos = LAYERSIZE*i + ROWSIZE*L + j;
            sand[sinkpos] = -1;
            sinkpos = LAYERSIZE*L + ROWSIZE*i + j;
            sand[sinkpos] = -1;
        }
}


/* Pushes pointer to site onto the stack */
void push(unsigned long int site){
    if (stack_pointer < BUFFERSIZE)
        stack[stack_pointer++] = site;
    else
        printf("Error: stack overflow\n");
}

/* Pop a pointer to site from the stack */
unsigned long int pop(void)
{
    if (stack_pointer > 0)
        return stack[--stack_pointer];
    else
    {
        printf("Error: cannot pop from empty stack\n");
        exit(-1);
    }
}

/* Compute avalanche using waves */
void avalanche1()
{
  register unsigned long int pos, sinkpos, temp;
    register unsigned long int i, j;
    unsigned long int n;
    unsigned long int posnb[6];
    
    for (i = 0; i < ARRAYSIZE; i++)
        topple[i] = 0;
    sand[0]++;
    if (sand[0] == 6)
    {
        topple[0] = 1;
	sand[0]--;
        while (sand[0] == 5)
        {
            sand[0] -= 6;
            for (n = 0; n < 6; n++)
            {
                if (++sand[nb[n]] == 6)
                    push(nb[n]);
            }
            while (stack_pointer != 0)
            {
	      pos = pop();
	      topple[pos] = 1;
	      sand[pos] -= 6;
	      if (sand[temp = west(pos)] == 5)
		{
		  push(temp);
		}
	      if ((temp & (0x1ff << 18)) ^ (L << 18))
		sand[temp]++;
	      if (sand[temp = east(pos)] == 5)
		{
		  push(temp);
		}
	      if ((temp & (0x1ff << 18)) ^ (L << 18))
		sand[temp]++;
	      if (sand[temp = south(pos)] == 5)
		{
		  push(temp);
		}
	      if ((temp & (0x1ff << 9)) ^ (L << 9))
		sand[temp]++;
	      if (sand[temp = north(pos)] == 5)
		{
		  push(temp);
		}
	      if ((temp & (0x1ff << 9)) ^ (L << 9))
		sand[temp]++;
	      if (sand[temp = right(pos)] == 5)
		{
		  push(temp);
		}
	      if ((temp & 0x1ff) ^ L)
		sand[temp]++;
	      if (sand[temp = left(pos)] == 5)
		{
		  push(temp);
		}
	      if ((temp & 0x1ff) ^ L)
		sand[temp]++;   
            }
        }
	sand[0]++;
    }
}

/* Carries out n independent avalanches */
void vertex_indept(unsigned long int n)
{
    register unsigned long int i;
    long int x, y, z;
    clock_t start_t, end_t, total_t;
    
    for (i = 0; i < n; i++)
    {
        tree();
        sandconf();
        prob[sand[0]]++;
        avalanche1();
        
        for (x = 0; x < L; x++)
            if (topple[x])
                vertices[x]++;
        for (x = -30; x <= 30; x++)
            for (y = -30; y <= 30; y++)
                for (z = -30; z<= 30; z++)
                    if (topple[(x & 0b111111111) + (y & 0b111111111)*ROWSIZE + (z & 0b111111111)*LAYERSIZE])
                        box[x+30][y+30][z+30]++;
    }
}

/* Main code - Read required number of samples from command line,
 * run the simulation and write results to file */
void main(int argc, char *argv[])
{
    int color;
    int numprocs, myid, j, jump;
    /*   unsigned char one_rand; */
    MPI_Comm workers;
    MPI_Status status;
    unsigned long int n, num_batch, batch_remain_sent, batch_remain_received;
    unsigned long int i;
    int x, y, z;
    char *tail;
    
    MPI_Init(&argc, &argv);
    world = MPI_COMM_WORLD;
    MPI_Comm_size(world, &numprocs);
    MPI_Comm_rank(world, &myid);
    /* Master node reads number of samples from command line */
    if (myid == MASTER)
    {
        printf("Number of arguments: %d \n", argc);
        n = strtoul(argv[argc-1], &tail, 10);
        num_batch = n;
        batch_remain_sent = num_batch;
        batch_remain_received = num_batch;
        printf("Number of samples requested: %lu\n", n);
        jump = atoi(argv[argc-2]);
        printf("Jump requested: %d\n", jump);
    }
    /* Master node broadcasts number of batches required */
    MPI_Bcast(&num_batch, 1, MPI_UNSIGNED_LONG, MASTER, world);
    MPI_Bcast(&jump, 1, MPI_INT, MASTER, world);
    color = (myid > num_batch);
    MPI_Comm_split(world, color, 0, &workers);

    /* Initialize random numbers */
    init_genrand(1); /* Set seed = 1 */
    pf = (unsigned long *)calloc(P_SIZE, sizeof(unsigned long));
    /* Initialize jump ahead */
    if (myid == MASTER)
    {
        FILE *fin;
        char c;
        
        /* read the file clist.txt, and set the coefficient */
        if ((fin = fopen("clist_mt19937.txt", "r")) == NULL){
            printf("File read error.\n");
            exit(1);
        }
        for (j = MEXP - 1; j>-1; j--){
            c = fgetc(fin);
            if (c == '1')
                set_coef(pf, j, 1);
        }
    }
    MPI_Bcast(pf, P_SIZE, MPI_UNSIGNED_LONG, MASTER, world);
    if (color == 0 && myid > 0)
    {
        jump_ahead(jump + myid - 1);  /* Jump ahead by (myid-1)*2^100 */
    }
    /* Worker nodes and Master node initialize their arrays */
    if (color == 0)
        initialize();
    /* Master node initializes its arrays for  portion  and  probability */
    if (myid == MASTER)
    {
        for (i=0; i < L; i++)
            portion[i] = 0;
        for (i=0; i < 6; i++)
            probability[i] = 0;
    }
    /* Main loop for Master Node */
    if (myid == MASTER)
    {
        sample_request = 1;
        for (j = 1; (j < numprocs && j <= num_batch); j++)
        {
            MPI_Send(&sample_request, 1, MPI_INT, j, SAMPLE_REQUEST, world);
            batch_remain_sent--;
        }
        while (batch_remain_received > 0)
        {
            MPI_Recv(&sample_request, 1, MPI_INT, MPI_ANY_SOURCE, SAMPLE_REPLY, world, &status);
            batch_remain_received--;
            if (batch_remain_sent > 0)
            {
                MPI_Send(&sample_request, 1, MPI_INT, status.MPI_SOURCE, SAMPLE_REQUEST, world);
                batch_remain_sent--;
            }
        }
        /* Send worker nodes a request to terminate computation */
        sample_request = 0;
        for (j = 1; (j < numprocs && j <= num_batch); j++)
            MPI_Send(&sample_request, 1, MPI_INT, j, SAMPLE_REQUEST, world);
        /* Send server node a request to terminate computation */
    }
    /* Main loop for worker nodes */
    if (myid > 0 && color == 0)
    {
        MPI_Recv(&sample_request, 1, MPI_INT, MASTER, SAMPLE_REQUEST, world, &status);
        while (sample_request != 0)
        {
            vertex_indept(1);
            MPI_Send(&sample_request, 1, MPI_INT, MASTER, SAMPLE_REPLY, world);
            MPI_Recv(&sample_request, 1, MPI_INT, MASTER, SAMPLE_REQUEST, world, &status);
        }
    }

    MPI_Reduce(&box, &total_box, 61*61*61, MPI_UNSIGNED_LONG, MPI_SUM, MASTER, workers);
    MPI_Reduce(&vertices, &total_vertices, L, MPI_UNSIGNED_LONG, MPI_SUM, MASTER, workers);
    MPI_Reduce(&prob, &total_prob, 6, MPI_UNSIGNED_LONG, MPI_SUM, MASTER, workers);
    MPI_Comm_free(&workers);
    /* Master node computes  portion  and  probability  and writes them to file */
    if (myid == MASTER)
    {
        for (i = 0; i < 6; i++)
            probability[i] = ((float) total_prob[i])/n;
        for (i = 0; i < L; i++)
            portion[i] = ((float) total_vertices[i])/n;

        portion_file = fopen("portion3d-256sink.txt", "w");
        if (portion_file == NULL)
            printf("Could not open portion file");
        probability_file = fopen("probability3d-256sink.txt", "w");
        if (probability_file == NULL)
            printf("Could not open probability file");
        box_file = fopen("box3d-256sink.txt", "w");
        if (box_file == NULL)
            printf("Could not open box file");

        for (i = 0; i < L; i++)
            fprintf(portion_file, "%f\n", portion[i]);
        fclose(portion_file);
        for (i = 0; i < 6; i++)
            fprintf(probability_file, "%f\n", probability[i]);
        fclose(probability_file);
        for (x = -30; x <= 30; x++)
            for (y = -30; y <= 30; y++)
                for (z = -30; z <= 30; z++)
                    fprintf(box_file, "%d %d %d %lu\n", x, y, z, total_box[x+30][y+30][z+30]);
        fclose(box_file);
    }
    MPI_Finalize();
}

