/*
 *	$Id: gmtstitch.c,v 1.17 2006/04/10 05:47:30 pwessel Exp $
 */
/* gmtstitch will combine pieces of coastlines or similar segments into a
 * continuous line, polygon, or group of lines/polygons so that the jump
 * between segment endpoints exceeds a specified threshold.
 *
 * Paul Wessel, March, 2006, derived from our earlier GSHHS processing tools July, 1994.
 */

#include "gmt.h"

#define SEG_I	0
#define SEG_J	1
#define END_A	0
#define END_B	1

struct BUDDY {
	int id;
	int orig_id;
	int end_order;
	double dist;
};

struct LINK {
	int id;
	int orig_id;
	int group;
	int pos;
	int n;
	int used;
	double x_end[2];
	double y_end[2];
	struct BUDDY buddy[2];
};


int main (int argc, char **argv)
{
	struct LINK *seg;
	int nearest_end[2][2], ii, end;
	int i, j, k, np, distance_flag = 0, n_files = 0, ns, n_args, id, pos, start_id, done, end_order;
	int n_new, n, chain = 0, n_islands = 0, n_trouble = 0, n_closed = 0, id2, L, G, error = 0;
	int n_alloc = GMT_SMALL_CHUNK, n_id_alloc = GMT_CHUNK, out_seg, match = 0, n_steps;	
	BOOLEAN individual_file = FALSE, first;
	struct GMT_DATASET *D;
	double dd[2][2], p_dummy_x, p_dummy_y, p_last_x, p_last_y, p_first_x, p_first_y, distance, cutoff = 0.0;
	char format[BUFSIZ], filename[BUFSIZ];
	FILE *fp;
#ifdef DEBUG
	FILE *fp3;
#endif
	void Write_This_Segment (FILE *fp, struct GMT_LINE_SEGMENT *line, int start, int end);
	
	argc = GMT_begin (argc, argv);

	for (i = 1; i < argc; i++) {
		if (argv[i][0] == '-') {
			switch (argv[i][1]) {

				/* Common parameters */

				case 'V':
				case 'H':
				case ':':
				case 'b':
				case 'f':
				case '\0':
					error += GMT_parse_common_options (argv[i], NULL, NULL, NULL, NULL);
					break;

				/* Supplemental parameters */

				case 'D':               /* Write each segment to a separate output file */
					if (argv[i][2]) strcpy (format, &argv[i][2]);
					individual_file = TRUE;
					break;
				case 'M':               /* Multiple line segments input */
					GMT_multisegment (&argv[i][2]);
					break;
				case 'T':
					cutoff = GMT_getradius (&argv[i][2]);
					if (argv[i][strlen(argv[i])-1] == 'k') distance_flag = 1;
					if (argv[i][strlen(argv[i])-1] == 'K') distance_flag = 2;
					break;
				default:
					error = TRUE;
					GMT_default_error (argv[i][1]);
					break;
			}
		}
		else
			n_files++;
	}

	if (argc == 1 || GMT_give_synopsis_and_exit) {
		fprintf (stderr, "gmtstitch %s - Join individual lines whose end points match within tolerance\n\n", GMT_VERSION);
		fprintf (stderr, "usage: gmtstitch <infiles> [%s] [-D[<template>]] [%s] -T<cutoff>[m|c|k|K]\n", GMT_H_OPT, GMT_M_OPT);
		fprintf (stderr, "\t[-V[l]] [%s] [%s] [%s]\n\n", GMT_t_OPT, GMT_b_OPT, GMT_f_OPT);

		if (GMT_give_synopsis_and_exit) exit (EXIT_FAILURE);

		fprintf (stderr, "\tinfiles (in ASCII or binary) have 2 or more columns with (x,y) or (y,x) in first columns.\n");
		fprintf (stderr, "\t  If no file(s) is given, standard input is read.\n");
		fprintf (stderr, "\n\tOPTIONS:\n");
		fprintf (stderr, "\t-D writes individual segments to separate files [Default writes one multisegment file to stdout].\n");
		fprintf (stderr, "\t   Append file name template which MUST contain a C-format specified for an integer (e.g., %%d).\n");
		fprintf (stderr, "\t   [Default uses gmtstitch_segment_%%d.d]\n");
		GMT_explain_option ('H');
		GMT_explain_option ('M');
		GMT_explain_option ('V');
		fprintf (stderr, "\t-T sets cutoff distance in data units; append m or c for minutes or seconds.\n");
		fprintf (stderr, "\t   Append k for km (implies -fg), use flat Earth approximation.\n");
		fprintf (stderr, "\t   Append K for km (implies -fg), use exact geodesic distances.\n");
		fprintf (stderr, "\t   If the current ELLIPSOID is Sphere then spherical great circle distances are used.\n");
		fprintf (stderr, "\t   If two lines has endpoints that are closer than this cutoff they will be joined.\n");
		GMT_explain_option (':');
		GMT_explain_option ('i');
		GMT_explain_option ('n');
		fprintf (stderr, "\t   Default is 2 input columns\n");
		GMT_explain_option ('o');
		GMT_explain_option ('n');
		GMT_explain_option ('f');
		GMT_explain_option ('.');
		exit (EXIT_FAILURE);
	}

	if (n_files == 0) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR:  No input files specified\n", GMT_program);
		error++;
	}
	if (cutoff < 0.0) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR:  -T cutoff must be >= 0!\n", GMT_program);
		error++;
	}
	if (GMT_io.binary[GMT_IN] && GMT_io.io_header[GMT_IN]) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR.  Binary input data cannot have header -H\n", GMT_program);
		error++;
	}
        if (GMT_io.binary[GMT_IN] && GMT_io.ncol[GMT_IN] == 0) GMT_io.ncol[GMT_IN] = 2;
        if (GMT_io.binary[GMT_IN] && GMT_io.ncol[GMT_IN] < 2) {
                fprintf (stderr, "%s: GMT SYNTAX ERROR.  Binary input data (-bi) must have at least %d columns\n", GMT_program, 2);
		error++;
	}
	if (error) exit (EXIT_FAILURE);

	GMT_put_history (argc, argv);	/* Update .gmtcommands4 */

	/* Now we are ready to take on some input values */

	if (individual_file && !format) strcpy (format, "gmtstitch_segment_%d.d");
	n_args = (argc > 1) ? argc : 2;

	switch (distance_flag) {	/* Take different action depending on how we want distances calculated */
		case 0:		/* Cartesian distance */
			GMT_distance_func = GMT_cartesian_dist;
			break;
		case 1:		/* Flat Earth Approximation */
			GMT_distance_func = GMT_flatearth_dist_km;
			break;
		case 2:		/* Full spherical calculation */
			GMT_distance_func = GMT_great_circle_dist_km;
			break;
		case 3:		/* Full Ellipsoidal calculation */
			GMT_distance_func = GMT_geodesic_dist_km;
		break;
	}

	/* Allocate memory and read in all the files; each file can have many lines (-M) */
	
	D = (struct GMT_DATASET *) GMT_memory (VNULL, 1, sizeof (struct GMT_DATASET), GMT_program);
	D->table = (struct GMT_TABLE **) GMT_memory (VNULL, n_alloc, sizeof (struct GMT_TABLE *), GMT_program);
	for (k = 1, i = 0; k < argc; k++) {
		if (argv[k][0] == '-') continue;

		if (gmtdefs.verbose) fprintf (stderr, "%s: Reading file %s\n", GMT_program, argv[k]);

		GMT_import_table ((void *)argv[k], GMT_IS_FILE, &D->table[i], 0.0, FALSE, FALSE, TRUE);
		i++;
		if (i == n_alloc) {
			n_alloc += GMT_SMALL_CHUNK;
			D->table = (struct GMT_TABLE **) GMT_memory ((void *)D->table, n_alloc, sizeof(struct GMT_TABLE *), GMT_program);
		}
	}
	if (i < n_alloc) D->table = (struct GMT_TABLE **) GMT_memory ((void *)D->table, i, sizeof(struct GMT_TABLE *), GMT_program);
	D->n_tables = i;
	seg = (struct LINK *) GMT_memory (VNULL, n_id_alloc, sizeof(struct LINK), GMT_program);
	id = pos = ns = out_seg = 0;
	if (gmtdefs.verbose) fprintf (stderr, "%s: Check for closed polygons\n", GMT_program);
	
	/* Closed polygons are already finished - just identify, write out, and move on */
	
	ns = -1;
	for (k = 0; k < D->n_tables; k++) {
		for (j = 0; j < D->table[k]->n_segments; j++) {
			np = D->table[k]->segment[j]->n_rows;
			ns++;
			distance = (GMT_distance_func) (D->table[k]->segment[j]->coord[0][0], D->table[k]->segment[j]->coord[1][0], D->table[k]->segment[j]->coord[0][np-1], D->table[k]->segment[j]->coord[1][np-1]);
			if (distance == 0.0) {	/* Already closed, just write out and forget in the rest of the program */
				if (individual_file) {
					sprintf (filename, format, out_seg);
					if ((fp = GMT_fopen (filename, GMT_io.w_mode)) == NULL ) {
						fprintf (stderr, "%s: Error creating file %s\n", GMT_program, filename);
						exit (EXIT_FAILURE);
					}
				}
				else
					fp = GMT_stdout;
#if DEBUG
				sprintf (GMT_io.segment_header, "> Segment %d\n", out_seg);
#endif
				if (!individual_file) GMT_write_segmentheader (fp, D->table[k]->segment[j]->n_columns);
				Write_This_Segment (fp, D->table[k]->segment[j], 0, np-1);
				if (individual_file) GMT_fclose (fp);
				n_islands++;
				out_seg++;
				continue;
			}
			
			/* Here we have a segment that is not closed.  Store refs to D->table and copy end points */
			
			seg[id].id = id;
			seg[id].orig_id = ns;
			seg[id].group = k;
			seg[id].pos = j;
			seg[id].n = np;
			seg[id].x_end[0] = D->table[k]->segment[j]->coord[0][0];
			seg[id].y_end[0] = D->table[k]->segment[j]->coord[1][0];
			seg[id].x_end[1] = D->table[k]->segment[j]->coord[0][np-1];
			seg[id].y_end[1] = D->table[k]->segment[j]->coord[1][np-1];
			seg[id].buddy[0].dist = seg[id].buddy[1].dist = DBL_MAX;
			id++;
			if (id == n_id_alloc) {
				n_id_alloc += GMT_CHUNK;
				seg = (struct LINK *) GMT_memory ((void *)seg, n_id_alloc, sizeof(struct LINK), GMT_program);
			}
		}
	}
	ns = id;
	if (ns == 0) {	/* All are closed */
		fprintf (stderr, "%s: All segments already form closed polygons\n", GMT_program);
		exit (EXIT_SUCCESS);
	}
	
	if (ns < n_id_alloc) seg = (struct LINK *) GMT_memory ((void *)seg, ns, sizeof(struct LINK), GMT_program);
	
	if (gmtdefs.verbose) fprintf (stderr, "%s: Found %d closed polygons\n", GMT_program, n_islands);
	
	/* The algorithm will be confused if there are identical duplicates of segments - thus we check */
	
	if (gmtdefs.verbose) fprintf (stderr, "%s: Check for duplicate lines\n", GMT_program);
	for (i = 0; i < ns; i++) {
		for (j = i + 1; j < ns; j++) {
			if ((seg[i].x_end[0] == seg[j].x_end[0] && seg[i].y_end[0] == seg[j].y_end[0]) ||
			    (seg[i].x_end[0] == seg[j].x_end[1] && seg[i].y_end[0] == seg[j].y_end[1]) ||
			    (seg[i].x_end[1] == seg[j].x_end[0] && seg[i].y_end[1] == seg[j].y_end[0]) ||
			    (seg[i].x_end[1] == seg[j].x_end[1] && seg[i].y_end[1] == seg[j].y_end[1])) {
			    	if (seg[i].n == seg[j].n) {
					for (k = match = 0; k < seg[i].n && k == match; k++) {
						match += (D->table[seg[i].group]->segment[seg[i].pos]->coord[0][k] == D->table[seg[j].group]->segment[seg[j].pos]->coord[0][k] && 
						          D->table[seg[i].group]->segment[seg[i].pos]->coord[1][k] == D->table[seg[j].group]->segment[seg[j].pos]->coord[1][k]);
					}
					match = (match == seg[i].n) ? 1 : 0;
					if (match) {
						fprintf (stderr, "%s: Segments %d and %d are duplicates - eliminate before using gmtstich", GMT_program, i, j);
						exit (EXIT_FAILURE);
					}
				}
			}
		}
	}
	
	if (gmtdefs.verbose) fprintf (stderr, "%s: Calculate and rank end point separations\n", GMT_program);
	
	/* We determine the distance from each segments two endpoints to the two endpoints on every other
	 * segment; this is four distances per segment.  We then assign the nearest endpoint to each end
	 * of a segment to the buddy structure which keeps the id of the nearest segment so far.
	 */
	 
	for (i = 0; i < ns; i++) {

		for (j = i; j < ns; j++) {
			/* nearest_end indicates which end is closest to this end */
			if (i == j) {	/* Store offset between the endpoints of a single segment (should be 0 if closed) */
				dd[SEG_I][END_A] = dd[SEG_J][END_B] = DBL_MAX;
				dd[SEG_I][END_B] = dd[SEG_J][END_A] = (GMT_distance_func) (seg[i].x_end[END_A], seg[i].y_end[END_A], seg[i].x_end[END_B], seg[i].y_end[END_B]);
    				nearest_end[SEG_I][END_A] = nearest_end[SEG_J][END_A] = END_B;
    				nearest_end[SEG_J][END_B] = nearest_end[SEG_I][END_B] = END_A;
			}
			else {	/* Store the distances between the 4 possible end-to-end configurations */
				dd[SEG_I][END_A] = (GMT_distance_func) (seg[i].x_end[END_A], seg[i].y_end[END_A], seg[j].x_end[END_A], seg[j].y_end[END_A]);
				dd[SEG_I][END_B] = (GMT_distance_func) (seg[i].x_end[END_A], seg[i].y_end[END_A], seg[j].x_end[END_B], seg[j].y_end[END_B]);
				dd[SEG_J][END_A] = (GMT_distance_func) (seg[i].x_end[END_B], seg[i].y_end[END_B], seg[j].x_end[END_A], seg[j].y_end[END_A]);
				dd[SEG_J][END_B] = (GMT_distance_func) (seg[i].x_end[END_B], seg[i].y_end[END_B], seg[j].x_end[END_B], seg[j].y_end[END_B]);
    				for (end = 0; end < 2; end++) nearest_end[SEG_I][end] = (dd[end][END_A] < dd[end][END_B]) ? END_A : END_B;
    				for (end = 0; end < 2; end++) nearest_end[SEG_J][end] = (dd[END_A][end] < dd[END_B][end]) ? END_A : END_B;
    			}
    			/* Update list of closest matches for both ends */
    			for (ii = 0; ii < 2; ii++) {	/* For each end of the segment */
    				end = nearest_end[SEG_I][ii];	/* The end of segment j that was closest to segment i's end ii */
    				if (dd[ii][end] < seg[i].buddy[ii].dist) {	/* This distance is shorter than the previous shortest distance */
					seg[i].buddy[ii].orig_id = seg[j].orig_id;
					seg[i].buddy[ii].id = j;
					seg[i].buddy[ii].dist = dd[ii][end];
					seg[i].buddy[ii].end_order = end;
    				}
    				end = nearest_end[SEG_J][ii];	/* The end of segment i that was closest to segment j's end ii */
    				if (dd[end][ii] < seg[j].buddy[ii].dist) {	/* This distance is shorter than the previous shortest distance */
 					seg[j].buddy[ii].orig_id = seg[i].orig_id;
 					seg[j].buddy[ii].id = i;
					seg[j].buddy[ii].dist = dd[end][ii];
					seg[j].buddy[ii].end_order = end;
    				}
    			}
		}
	}

#ifdef DEBUG	
	fp3 = fopen ("link.dat", "w");
	for (i = 0; i < ns; i++) fprintf (fp3, "%d\t%d\t%d\t%d\t%g\t%d\t%d\t%g\n", i, seg[i].orig_id, seg[i].buddy[0].orig_id, seg[i].buddy[0].end_order, seg[i].buddy[0].dist, seg[i].buddy[1].orig_id, seg[i].buddy[1].end_order, seg[i].buddy[1].dist);
	fclose (fp3);
#endif
	start_id = done = 0;
	p_dummy_x = p_dummy_y = DBL_MAX;
	
	if (gmtdefs.verbose) fprintf (stderr, "%s: Assemble new segments\n", GMT_program);
	while (!done) {
	
		/* Find the 'beginning' of the chain that this segment belongs to by tracing the connections
		 * until we either reappear at the starting point (a closed loop) or we reach an end (i.e.,
		 * the nearest next endpoint is beyond the separation threshold. */
		
		done = FALSE;
		id = start_id;
		end_order = n_steps = 0;
#if DEBUG
		if (gmtdefs.verbose) fprintf (stderr, "%d\n", seg[id].orig_id);
#endif
		while (!done && seg[id].buddy[end_order].dist <= cutoff && !seg[seg[id].buddy[end_order].id].used) {
			id2 = seg[id].buddy[end_order].id;
#if DEBUG
			if (gmtdefs.verbose) fprintf (stderr, "%d\n", seg[id2].orig_id);
#endif
			if (id2 == start_id)	/* Closed polygon, start here */
				done = TRUE;
			if (id2 == id || n_steps > ns) {	/* Not good... */
				done = TRUE;
				n_trouble++;
			}
			else {	/* Trace the connection to the next segment */
				end_order = !seg[id].buddy[end_order].end_order;
				id = id2;
			}
			n_steps++;
		}
				
		/* This id should be the beginning of a segment.  Now trace forward and dump out the chain */
		
		/* First dump start segment */
		
		start_id = id;
		
		memset (GMT_io.segment_header, 0, BUFSIZ);
		if (individual_file) {
			sprintf (filename, format, out_seg);
			if ((fp = GMT_fopen (filename, GMT_io.w_mode)) == NULL ) {
				fprintf (stderr, "%s: Error creating file %s\n", GMT_program, filename);
				exit (EXIT_FAILURE);
			}
		}
		else {
			fp = GMT_stdout;
#if DEBUG
			sprintf (GMT_io.segment_header, "> Segment %d\n", out_seg);
#endif
			GMT_write_segmentheader (fp, D->table[seg[id].group]->segment[seg[id].pos]->n_columns);
		}
		
		p_first_x = p_last_x = p_dummy_x;
		p_first_y = p_last_y = p_dummy_y;
		n_new = 0;
		k = 0;
		done = FALSE;
		first = TRUE;
		do {
			G = seg[id].group;
			L = seg[id].pos;
			np = seg[id].n;
			if (end_order == 0) {	/* Already in the right order */
				if (D->table[G]->segment[L]->coord[0][0] == p_last_x && D->table[G]->segment[L]->coord[1][0] == p_last_y) {	/* Skip duplicate anchor point */
					j = 1;
					n = np - 1;
				}
				else {	/* We need all the points */
					j = 0;
					n = np;
				}
				Write_This_Segment (fp, D->table[G]->segment[L], j, np-1);
				p_last_x = D->table[G]->segment[L]->coord[0][np-1];
				p_last_y = D->table[G]->segment[L]->coord[1][np-1];
				if (first) p_first_x = D->table[G]->segment[L]->coord[0][0], p_first_y = D->table[G]->segment[L]->coord[1][0];
			}
			else {	/* Must reverse the segment's order of points */
				if (D->table[G]->segment[L]->coord[0][np-1] == p_last_x && D->table[G]->segment[L]->coord[1][np-1] == p_last_y) {	/* Skip duplicate anchor point */
					j = 1;
					n = np - 1;
				}
				else {	/* We need all the points */
					j = 0;
					n = np;
				}
				Write_This_Segment (fp, D->table[G]->segment[L], np-1-j, 0);
				p_last_x = D->table[G]->segment[L]->coord[0][0];
				p_last_y = D->table[G]->segment[L]->coord[1][0];
				if (first) p_first_x = D->table[G]->segment[L]->coord[0][np-1], p_first_y = D->table[G]->segment[L]->coord[1][np-1];
			}
			first = FALSE;
			n_new += n;
			end_order = !end_order;
			seg[id].used = TRUE;
			if (seg[id].buddy[end_order].dist <= cutoff && !seg[seg[id].buddy[end_order].id].used) {
				/* Not done, trace into the next connecting segment */
				id2 = seg[id].buddy[end_order].id;
				end_order = seg[id].buddy[end_order].end_order;
				done = (id2 == start_id || id2 == id);
				id = id2;
			}
			else	/* End of the D->table for this segment */
				done = TRUE;
			k++;
		} while (!done);
		if (individual_file) GMT_fclose (fp);
		if (gmtdefs.verbose) fprintf (stderr, "%s: Segment %d made from %d pieces\n", GMT_program, out_seg, k);
		
		if (p_first_x == p_last_x && p_first_y == p_last_y) n_closed++;
		
		chain++;
		out_seg++;
		
		/* Wind to the next unused segments to start the connection search again */
		start_id = 0;
		while (start_id < ns && seg[start_id].used) start_id++;
		done = (start_id == ns);	/* No more unused segments */
	}

	fprintf (stderr, "%s: Segments in: %d Segments out: %d\n", GMT_program, ns + n_islands, chain + n_islands);
	if (n_trouble) fprintf (stderr, "%s: %d trouble spots\n", GMT_program, n_trouble);
	if (n_closed) fprintf (stderr, "%s: %d new closed segments\n", GMT_program, n_closed);
	if (n_islands) fprintf (stderr, "%s: %d were already closed\n", GMT_program, n_islands);
	
	GMT_free ((void *)seg);
	GMT_free_dataset (D);

	exit (EXIT_SUCCESS);
}

void Write_This_Segment (FILE *fp, struct GMT_LINE_SEGMENT *line, int start, int end)
{
	int i, j, inc, done = FALSE;
	static double out[BUFSIZ];
	
	inc = (start < end) ? +1 : -1;
	for (i = start; !done; i += inc) {
		for (j = 0; j < line->n_columns; j++) out[j] = line->coord[j][i];
		GMT_output (fp, line->n_columns, out);
		done = (i == end);
	}
}
