#include "block.h"

#include "block_io.h"
#include "script.h"
#include "utility.h"

static float *store;
static float *row_data;

static Bool have_output;

static int ndim;
static int ndim_process;
static int ndim_rest;

static int dim_process[MAX_NDIM];
static int dim_rest[MAX_NDIM];
static Bool dim_processed[MAX_NDIM];

static int nscripts;
static Script *scripts;

static int nchunks;
static int begin_chunk;
static int end_chunk;
static int step_chunk;

static int row_jump1;
static int row_jump2;
static int block_start;
static int row_start;

static int size_of_block;
static int nblocks_per_input;
static int nblocks_per_output;
static int nblocks_to_do;
static int nrows_to_do;

static int *block_size;
static int *npoints_in;

static int npoints_out[MAX_NDIM];
static int npoints_proc[MAX_NDIM];
static int npoints_max[MAX_NDIM];
static int chunk_blocks[MAX_NDIM];
static int nblock_points[MAX_NDIM];
static int block_point[MAX_NDIM];

static int nblocks_in[MAX_NDIM];
static int nblocks_out[MAX_NDIM];
static int nblocks_proc[MAX_NDIM];
static int nblocks_max[MAX_NDIM];
static int cum_points_in[MAX_NDIM];
static int cum_blocks_in[MAX_NDIM];
static int cum_blocks_out[MAX_NDIM];
static int cum_blocks_proc_in[MAX_NDIM];
static int cum_blocks_proc_out[MAX_NDIM];
static int cum_blocks_proc_max[MAX_NDIM];
static int cum_blocks_rest[MAX_NDIM];
static int cum_block_size[MAX_NDIM];
static int cum_blocks_to_do[MAX_NDIM];
static int cum_rows_to_do[MAX_NDIM];

static int array[MAX_NDIM];

static Block_IO block_io_in;
static Block_IO block_io_out;

/* for Varian processing */
static Bool have_varian;
static int *dim_order;
static int block_header;
static int cum_varian1[MAX_NDIM];
static int cum_varian2[MAX_NDIM];
static int array1[MAX_NDIM];
static int array2[MAX_NDIM];

static void init_arrays()
{
    int i, j, k, m, n;

    for (i = 0; i < ndim; i++)
	dim_processed[i] = FALSE;

    for (n = 0; n < nscripts; n++)
    {
	for (i = 0; i < scripts[n].ndim; i++)
	{
	    j = scripts[n].dims[i];
	    dim_processed[j] = TRUE;
	}
    }

    ndim_process = ndim_rest = 0;
    for (i = 0; i < ndim; i++)
    {
	if (dim_processed[i])
	{
	    dim_process[ndim_process] = i;
	    ndim_process++;
	}
	else
	{
	    dim_rest[ndim_rest] = i;
	    ndim_rest++;
	}
    }

    COPY_VECTOR(npoints_out, npoints_in, ndim);

    for (i = 0; i < ndim; i++)
	npoints_max[i] = npoints_in[i];

    for (n = 0; n < nscripts; n++)
    {
	j = scripts[n].dims[0];
	npoints_out[j] = scripts[n].npts[0];
	npoints_max[j] = MAX(npoints_max[j], npoints_out[j]);
    }

    BLOCKS(nblocks_in, npoints_in, block_size, ndim);
    BLOCKS(nblocks_out, npoints_out, block_size, ndim);
    BLOCKS(nblocks_max, npoints_max, block_size, ndim);

    CUMULATIVE(cum_points_in, npoints_in, n, ndim);
    CUMULATIVE(cum_blocks_in, nblocks_in, n, ndim);
    CUMULATIVE(cum_blocks_out, nblocks_out, n, ndim);
    CUMULATIVE(cum_block_size, block_size, size_of_block, ndim);

    nblocks_per_input = 1;
    nblocks_per_output = 1;
    n = 1;

    for (i = 0; i < ndim_process; i++)
    {
	j = dim_process[i];

	cum_blocks_proc_in[i] = nblocks_per_input;
	cum_blocks_proc_out[i] = nblocks_per_output;
	cum_blocks_proc_max[i] = n;

	nblocks_per_input *= nblocks_in[j];
	nblocks_per_output *= nblocks_out[j];
	n *= nblocks_max[j];
    }

    nchunks = 1;

    for (i = 0; i < ndim_rest; i++)
    {
	j = dim_rest[i];
	cum_blocks_rest[i] = nchunks;
	nchunks *= nblocks_in[j];
		/* Note: for these j, nblocks_in == nblocks_out */
    }

    if ((nblocks_per_output > nblocks_per_input) &&
				(block_io_in.file == block_io_out.file))
    {
	begin_chunk = nchunks - 1;  /* work from the end of the file */
	end_chunk = -1;
	step_chunk = -1;
    }
    else
    {
	begin_chunk = 0;  /* work from the beginning of the file */
	end_chunk = nchunks;
	step_chunk = 1;
    }

    if (have_varian)
    {
	cum_varian1[0] = cum_varian2[0] = 0; /* arbitrary */

	m = n = npoints_in[0] + block_header;
	n *= 1 << (ndim - 1);

	for (i = 1; i < ndim; i++)
	{
	    j = dim_order[i];

	    cum_varian1[i] = n;
	    k = npoints_in[i] / 2;
	    n *= k;

	    cum_varian2[j] = m;
	    m *= 2;
	}
    }
}

static void init_chunk(int chunk)
{
    int j, k;

    ARRAY_OF_INDEX(array, chunk, cum_blocks_rest, ndim_rest);

    for (j = 0; j < ndim_rest; j++)
    {
	k = dim_rest[j];
	chunk_blocks[k] = array[j];

	if (array[j] == (nblocks_in[k]-1))  /* last block */
	    nblock_points[k] = 1 + (npoints_proc[k]-1) % block_size[k];
	else
	    nblock_points[k] = block_size[k];
    }
}

static Status disk_to_store(int block, String error_msg)
{
    int j, k, disk_block, store_block;
    float *s;

    sprintf(error_msg, "block %d: ", block);
    error_msg += strlen(error_msg);

    ARRAY_OF_INDEX(array, block, cum_blocks_proc_in, ndim_process);

    for (j = 0; j < ndim_process; j++)
    {
	k = dim_process[j];
	chunk_blocks[k] = array[j];
    }

    INDEX_OF_ARRAY(disk_block, chunk_blocks, cum_blocks_in, ndim);
    INDEX_OF_ARRAY(store_block, array, cum_blocks_proc_max, ndim_process);

    s = store + size_of_block * store_block;

    CHECK_STATUS(read_file_block(&block_io_in, disk_block, s, error_msg));

    return  OK;
}

static Status store_to_disk(int block, String error_msg)
{
    int j, k, disk_block, store_block;
    float *s;

    sprintf(error_msg, "block %d: ", block);
    error_msg += strlen(error_msg);

    ARRAY_OF_INDEX(array, block, cum_blocks_proc_out, ndim_process);

    for (j = 0; j < ndim_process; j++)
    {
	k = dim_process[j];
	chunk_blocks[k] = array[j];
    }

    INDEX_OF_ARRAY(disk_block, chunk_blocks, cum_blocks_out, ndim);
    INDEX_OF_ARRAY(store_block, array, cum_blocks_proc_max, ndim_process);

    s = store + size_of_block * store_block;

    CHECK_STATUS(write_file_block(&block_io_out, disk_block, s, error_msg));

    return  OK;
}

static void init_process(int npts, int dim)
{
    int j, k;

    nblocks_to_do = 1;

    for (j = 0; j < ndim_process; j++)
    {
	cum_blocks_to_do[j] = nblocks_to_do;

	k = dim_process[j];

	if (k != dim)
	{
	    nblocks_to_do *= nblocks_proc[k];
	}
	else /* k == dim */
	{
	    row_jump1 = cum_block_size[dim];
	    row_jump2 = size_of_block * cum_blocks_proc_max[j]
					- block_size[dim] * row_jump1;
	}
    }

    npoints_out[dim] = npts;
}

static void end_process(int dim)
{
    npoints_proc[dim] = npoints_out[dim];
    nblocks_proc[dim] = 1 + (npoints_proc[dim] - 1) / block_size[dim];
}

static void init_block(int dim, int block)
{
    int j, k, store_block;

    ARRAY_OF_INDEX(array, block, cum_blocks_to_do, ndim_process);
    INDEX_OF_ARRAY(store_block, array, cum_blocks_proc_max, ndim_process);
    block_start = size_of_block * store_block;

    for (j = 0; j < ndim_process; j++)
    {
	k = dim_process[j];
	chunk_blocks[k] = array[j];  /* needed for no_block_process */

	if (k != dim)
	{
	    if (array[j] == (nblocks_proc[k]-1))  /* last block */
	    	nblock_points[k] = 1 + (npoints_proc[k]-1) % block_size[k];
	    else
	    	nblock_points[k] = block_size[k];
	}
	else /* k == dim */
	{
	    nblock_points[k] = 1;
	}
    }

    nrows_to_do = 1;

    for (j = 0; j < ndim; j++)
    {
	cum_rows_to_do[j] = nrows_to_do;
	nrows_to_do *= nblock_points[j];
    }
}

static void init_row(int row)
{
    ARRAY_OF_INDEX(block_point, row, cum_rows_to_do, ndim);
    INDEX_OF_ARRAY(row_start, block_point, cum_block_size, ndim);
}

static Status disk_to_row(int row, String error_msg)
{
    int i, d1, d2, disk_point, n = npoints_proc[0];

    sprintf(error_msg, "row %d: ", row);
    error_msg += strlen(error_msg);

    for (i = 0; i < ndim; i++)
	array[i] = chunk_blocks[i]*block_size[i] + block_point[i];

    if (have_varian)
    {
	for (i = 0; i < ndim; i++)
	{
	    array1[i] = array[i] / 2;
	    array2[i] = array[i] % 2;
	}

	INDEX_OF_ARRAY(d1, array1, cum_varian1, ndim);
	INDEX_OF_ARRAY(d2, array2, cum_varian2, ndim);
	disk_point = d1 + d2 + block_header;
/*
printf("row = %d, d1 = %d, d2 = %d, disk_point = %d\n", row, d1, d2,disk_point);
*/
    }
    else
    {
	INDEX_OF_ARRAY(disk_point, array, cum_points_in, ndim);
    }

    CHECK_STATUS(read_file_blocks(&block_io_in, disk_point, n, row_data, error_msg));

/*  could do complex conjugation for Varian data, but leave out for
    now because it's possible that D1 has real, not complex data  */

    return  OK;
}

static void store_to_row(int dim)
{
    int i, j, k, b, n;

    k = block_start + row_start;
    b = block_size[dim];
    n = npoints_proc[dim] - b;

    for (i = 0; i < n; i = j)
    {
	for (j = i; j < i+b; j++)
	{
	    row_data[j] = store[k];
	    k += row_jump1;
	}

	k += row_jump2;
    }

    n = npoints_proc[dim];
    for (j = i; j < n; j++)
    {
	row_data[j] = store[k];
	k += row_jump1;
    }
}

static void row_to_store(int dim)
{
    int i, j, k, b, n;

    k = block_start + row_start;
    b = block_size[dim];
    n = npoints_out[dim] - b;

    for (i = 0; i < n; i = j)
    {
	for (j = i; j < i+b; j++)
	{
	    store[k] = row_data[j];
	    k += row_jump1;
	}

	k += row_jump2;
    }

    n = npoints_out[dim];
    for (j = i; j < n; j++)
    {
	store[k] = row_data[j];
	k += row_jump1;
    }
}

static void process_script(int n)
{
    int i, m;
    Command *c;

    m = scripts[n].ncommands;

    for (i = 0; i < m; i++)
    {
	c = scripts[n].commands + i;
	(*(c->do_process))(c->code, row_data);
    }
}

static Status block_process(String error_msg)
{
    int i, j, k, l, m, n, npts, c;
    char *msg;

    c = 1;
    for (i = begin_chunk; i != end_chunk; i += step_chunk)
    {
	printf("\t... working on chunk %d of %d\n", c++, nchunks);
	FLUSH;

    	COPY_VECTOR(npoints_proc, npoints_in, ndim);
    	COPY_VECTOR(nblocks_proc, nblocks_in, ndim);

	init_chunk(i);

	sprintf(error_msg, "chunk %d, ", i);
	msg = error_msg + strlen(error_msg);

	for (j = 0; j < nblocks_per_input; j++)
	    CHECK_STATUS(disk_to_store(j, msg));

	for (n = 0; n < nscripts; n++)
	{
	    k = scripts[n].dims[0];
	    npts = scripts[n].npts[0];

	    init_process(npts, k);

	    for (l = 0; l < nblocks_to_do; l++)
	    {
		init_block(k, l);

		for (m = 0; m < nrows_to_do; m++)
		{
		    init_row(m);
		    store_to_row(k);
		    process_script(n);

		    if (scripts[n].output)
		    	row_to_store(k);
		}
	    }

	    end_process(k);
	}

	if (have_output)
	{
	    if (block_io_in.file == block_io_out.file)
		block_io_out.last_done = block_io_in.last_done;

	    sprintf(error_msg, "chunk %d, ", i);
	    msg = error_msg + strlen(error_msg);

	    for (j = 0; j < nblocks_per_output; j++)
	    	CHECK_STATUS(store_to_disk(j, msg));

	    if (block_io_in.file == block_io_out.file)
		block_io_in.last_done = block_io_out.last_done;
	}
    }

    return  OK;
}

static Status no_block_process(String error_msg)
{
    int i, j, k, l, m, n, npts;
    char *msg;

    for (i = 0; i < nchunks; i++)
    {
	printf("\t... working on chunk %d of %d\n", i+1, nchunks);
	FLUSH;

    	COPY_VECTOR(npoints_proc, npoints_in, ndim);
    	COPY_VECTOR(nblocks_proc, nblocks_in, ndim);

	init_chunk(i);

	k = 0;
	npts = scripts[0].npts[0];

	init_process(npts, k);

	for (l = 0; l < nblocks_to_do; l++)
	{
	    init_block(k, l);

	    sprintf(error_msg, "chunk %d, block %d, ", i, l);
	    msg = error_msg + strlen(error_msg);

	    for (m = 0; m < nrows_to_do; m++)
	    {
		init_row(m);
		CHECK_STATUS(disk_to_row(m, msg));

		row_to_store(k);
	    }
	}

	for (n = 1; n < nscripts; n++)
	{
	    k = scripts[n].dims[0];
	    npts = scripts[n].npts[0];

	    init_process(npts, k);

	    for (l = 0; l < nblocks_to_do; l++)
	    {
		init_block(k, l);

		for (m = 0; m < nrows_to_do; m++)
		{
		    init_row(m);
		    store_to_row(k);
		    process_script(n);

		    if (scripts[n].output)
		    	row_to_store(k);
		}
	    }

	    end_process(k);
	}

	sprintf(error_msg, "chunk %d, ", i);
	msg = error_msg + strlen(error_msg);

	for (j = 0; j < nblocks_per_output; j++)
	    CHECK_STATUS(store_to_disk(j, msg));
    }

    return  OK;
}

Status process_blocks(Size_info *size_info, Store_info *store_info,
			File_info *file_info, int n,
			Script *s, String error_msg)
{
    int i;

    nscripts = n;
    scripts = s;

    ndim = size_info->ndim;
    block_size = size_info->block_size;
    npoints_in = size_info->npoints;

    store = store_info->store;
    row_data = store_info->work;

    have_output = file_info->have_output;

    have_varian = file_info->have_varian;
    dim_order = file_info->dim_order;
    block_header = file_info->block_header;

    if (!(file_info->blocked) && !have_output)
	RETURN_ERROR_MSG("non-blocked data must have output in process_blocks");

    if (file_info->blocked && have_varian)
	RETURN_ERROR_MSG("varian data must be non-blocked in process_blocks");

    for (i = 0; i < nscripts; i++)
    {
	if (scripts[i].ndim > 1)
	    RETURN_ERROR_MSG("must have 1-dim. scripts in process_blocks");
    }

    block_io_in.file = file_info->file_in;
    block_io_out.file = file_info->file_out;

    init_arrays();

    block_io_in.name = file_info->input_file;
    block_io_in.swapped = file_info->swapped;
    block_io_in.integer = file_info->integer;
    block_io_in.deflated = FALSE;
    block_io_in.header = file_info->header;
    block_io_in.dir_size = 0;
    block_io_in.directory = (int *) NULL;
    block_io_in.byte_size = file_info->byte_size;

    if (have_varian)
	block_io_in.header += file_info->file_header;

    block_io_out.name = file_info->output_file;
    block_io_out.block_size = size_of_block;
    block_io_out.deflated = FALSE;

    CHECK_STATUS(init_block_read(&block_io_in, error_msg));
    CHECK_STATUS(init_block_write(&block_io_out, error_msg));

    if (file_info->blocked)
    {
	block_io_in.block_size = size_of_block;
	CHECK_STATUS(block_process(error_msg));
    }
    else
    {
	block_io_in.block_size = 1;
	CHECK_STATUS(no_block_process(error_msg));
    }

    if (have_output)
    {
	COPY_VECTOR(npoints_in, npoints_out, ndim);

	file_info->input_file = file_info->output_file;
	file_info->swapped = determine_swapped();
	file_info->integer = FALSE;
	file_info->blocked = TRUE;
	file_info->header = 0;

	file_info->have_varian = FALSE;
	file_info->byte_size = BYTES_PER_WORD;
    }

    return  OK;
}
