/*need
    ba2 is
    ntax                 => ntax
    number of topologies => num_topology
    array of topologies  => topology
    cutpoint             => cutpoint
    array of names       => taxon_name
    */
#include <stdio.h>
#include <ctype.h>
#include <time.h>
#include <string.h>
#include <assert.h>

#define FIFTEEN_BITS_MAX 500000

int find_name(s1,s2)
char *s1, *s2;
{
int i,j,found;

for (i=0;i< strlen(s2)-strlen(s1);i++) {
	found=1;
	for (j=0;j<strlen(s1);j++) {
		if (s2[i+j]!=s1[j]) {
			found=0;
			break;
			}
		}
	if (found) return(i);
	}
return 0;
}


void get_taxon_number_here(i_start,j_start,in_buffer,out_buffer,names,ntax)
char **names;
int *i_start, *j_start;
char *in_buffer;
int *out_buffer,ntax;
{
int i,name_found;
char *name_buffer;

name_buffer=(char *)malloc(50*sizeof(char));
assert((int)name_buffer);

i=0;
while (isalnum(in_buffer[(*i_start)+i])) {
	name_buffer[i]=in_buffer[(*i_start)+i];
	++i;
	}
name_buffer[i]='\0';
*i_start+=i;
if (i>50) {
	fprintf(stderr,"Sequence name too long.\n");
	exit(-1);
	}
name_found=0;
for (i=0;i<ntax;i++) {
	if (!strcmp(name_buffer,names[i])) {
		out_buffer[*j_start]=i;
		*j_start+=1;
		if (name_found==0) name_found=1;
		else {
			fprintf(stderr,"Sequence name (%s) repeated in groups representation.\n",names[i]);
			exit(-1);
			}
		}
	}
free(name_buffer);
}

int print_jack2(best_aligns,count,taxon_name,ntax,final_groups,counter2)
char *best_aligns;
int count,ntax;
int **final_groups, counter2;
char **taxon_name;
{
int i,j,counter,pos;
char *buf;


	printf("%d %d\n",counter2,ntax);
/*dry run of data file*/
for (i=0;i<ntax;i++) {
  printf("%s ",taxon_name[i]);
  for (j=0;j<counter2;j++) printf("%d",final_groups[j][i]);
  printf("\n");
}
        /* for (i = 0; i < 2; i++) {
			for (j=0;j<strlen(taxon_name[i]);j++) printf("%c",taxon_name[i][j]);
			printf(" 0\n");
			}
		for (i = 2; i < ntax; i++) {
			for (j=0;j<strlen(taxon_name[i]);j++) printf("%c",taxon_name[i][j]);
			printf(" 1\n");
			}*/

	printf(";\n");
	printf("cc- 0.%d;\n",counter2-1);
	/*printf("tread\n");

		buf=(char *)malloc((1+strlen(best_aligns))*sizeof(char));
		assert((int) buf);
		counter=0;
		pos=0;
		for (j=0;j<strlen(best_aligns);j++) {
			if (best_aligns[j]=='(') buf[pos++]=best_aligns[j];
			else if ((best_aligns[j]!=')') && (best_aligns[j]!=' ')) {
				buf[pos++]=best_aligns[j];
				counter++;
				}
			else if ((best_aligns[j]==')') || (best_aligns[j]==' ')) {
				buf[pos++]=best_aligns[j];
				counter=0;
				}
		}
		
		for (j=0;j<pos;j++) printf("%c",buf[j]);
		if (i<(count-1)) printf("*\n");
		else printf(";\n");
		free(buf);
*/
	printf("\nproc /	;\n");

return 1;
}


int **get_groups4(buffer,ntax,names,ngroups)
char *buffer;
int ntax;
char **names;
int *ngroups;
{
int i,j,*buffer2,paren_check_left,paren_check_right;
int rep_length,groups_marker,groups_number;
int **groups;

i=((*ngroups))=0;

/*
fprintf(stderr,"%s\n",buffer);
*/
paren_check_left=paren_check_right=0;
for (i=0;i<strlen(buffer);i++) {
	if (buffer[i]=='(') ++paren_check_left;
	else if (buffer[i]==')') ++paren_check_right;
	}
if (paren_check_left!=paren_check_right) {
	fprintf(stderr,"Parentheses do not match in groups specification.\n");
	exit(-1);
	}

buffer2=(int *)malloc((strlen(buffer)+1)*sizeof(int));
assert((int)buffer2);
for (i=0;i<(strlen(buffer)+1);i++) buffer2[i]=-32000;
j=0;
for (i=0;i<strlen(buffer);i++)
	{
	if (buffer[i]=='(') buffer2[j++]=-1;
	else if (buffer[i]==')') buffer2[j++]=-2;
	else if (buffer[i]==',') buffer2[j++]=-3;
	else if (isspace(buffer[i])) buffer2[j++]=-3; /*can get rid of extra commas and terminal spaces laterif need to */
	else if (isalnum(buffer[i])) {
		get_taxon_number_here(&i,&j,buffer,buffer2,names,ntax);
		--i;
		}
	else {
		fprintf(stderr,"Bad character |%c| in position %d of groups description.\n",buffer[i],i);
		exit(-1);
		}
	}
buffer2[i]=-4;
i=0;
while (buffer2[i]>-4) {
	if (buffer2[i]==-2) rep_length=i+1;
	i++;
	}

/*
for (i=0;i<rep_length;i++) fprintf(stderr,"%d,",buffer2[i]);
fprintf(stderr,"\n");
*/

/*the real stuff*/
(*ngroups)=paren_check_left;
groups=(int **)malloc((*ngroups)*sizeof(int *));
assert((int)groups);
for (i=0;i<paren_check_left;i++) {
	groups[i]=(int *)malloc((ntax+1)*sizeof(int));
	assert((int)groups[i]);
	for (j=0;j<ntax+1;j++) groups[i][j]=0;
	}

groups_number=0;
for (i=0;i<rep_length;i++) {
	if (buffer2[i]==-1) {
		groups_marker=0;
		for (j=i;j<rep_length;j++) {
			if (buffer2[j]==-1) ++groups_marker;
			if ((buffer2[j]>=0)&&(buffer2[j]<ntax)) groups[groups_number][buffer2[j]]=1;
			if (buffer2[j]==-2) {
				--groups_marker;
				if (groups_marker==0) break;
				}
			}
		++groups_number;
		}
	}

for (i=0;i<(*ngroups);i++) {
	for (j=0;j<ntax;j++) groups[i][ntax]+=groups[i][j];
	if (groups[i][ntax]==0) {
		fprintf(stderr,"Error in groups specification -- parenthesis pair which contains no sequences.\n");
		exit(-1);
		}
	/*
	for (j=0;j<=ntax;j++) fprintf(stderr," %d ",groups[i][j]);
	fprintf(stderr,"\n");*/
	}
/*fprintf(stderr,"\n");*/

/*if (values->VERBOSE) fprintf(stderr,"%d groups required\n",(*ngroups));*/
free(buffer2);
return groups;
}

int get_jack_groups(ntax,num_topologies,topology,cutpoint,taxon_name)
int ntax, num_topologies, cutpoint;
char **taxon_name, **topology;
{
int i,j,k,num_there=0;
int counter=0,ngroups;
int **groups;
int **big_groups, *num_of_each;
int same,counter2,counter3,in,l,cur_len;
int **final_groups,d1,d2,first;
char *group_names,*temp1;
char *ba;

groups=NULL;
big_groups=NULL;
num_of_each=NULL;
final_groups=NULL;
group_names=NULL;
temp1=NULL;


big_groups=(int **)malloc(sizeof(int *));
assert((int) big_groups);
big_groups[0]=(int *)malloc((ntax+1)*sizeof(int));
assert((int) big_groups[0]);

for (i=0;i<num_topologies;i++) {
	groups=get_groups4(topology[i],ntax,taxon_name,&ngroups);
	big_groups=(int **)realloc(big_groups,(counter+ngroups)*sizeof(int *));
	assert((int) big_groups);
	for (j=counter;j<counter+ngroups;j++) {
		big_groups[j]=(int *)malloc((ntax+1)*sizeof(int));
		assert((int) big_groups[j]);
		for (k=0;k<ntax+1;k++) big_groups[j][k]=groups[j-counter][k];
		free(groups[j-counter]);
		}
	free(groups);
	counter+=ngroups;
	}
fprintf(stderr,"There were a total of %d groups in %d trees\n",counter,num_topologies);
num_of_each=(int *)malloc(counter*sizeof(int));
assert((int) num_of_each);
for (i=0;i<counter;i++) num_of_each[i]=1;
for (i=0;i<counter;i++) if (num_of_each[i]>0) {
	for (j=i+1;j<counter;j++) if (num_of_each[j]>0) {
		same=1;
		for (k=0;k<ntax+1;k++) if (big_groups[i][k]!=big_groups[j][k]) {same=0; break;}
		if (same) {
			num_of_each[i]++;
			num_of_each[j]=0;
			}
		}
	}
counter2=0;
for (i=0;i<counter;i++) {
	if (big_groups[i][ntax]==ntax) num_of_each[i]=0;
	if (num_of_each[i]>=((cutpoint*num_topologies/100))) ++counter2;
	}
if (counter2>0) {
	final_groups=(int **)malloc(counter2*sizeof(int *));
	assert((int) final_groups);
	for (i=0;i<counter2;i++) {
		final_groups[i]=(int *)malloc((ntax+1)*sizeof(int));
		assert((int) final_groups[i]);
		}
	}
/*start hennig output*/
printf("xread\n");
printf("'Tree generated by Jackbooting ala Farris with %d replicates\n",num_topologies);
counter2=0;
for (i=0;i<counter;i++) if (num_of_each[i]>=((cutpoint*num_topologies)/100)) counter2++;
printf("There were %d suported (>=%d%% replicates) groups of %d possible and non-trivial\n",counter2,cutpoint,ntax-2);
fprintf(stderr,"There were %d suported (>=%d%% replicates) groups of %d possible and non-trivial\n",counter2,cutpoint,ntax-2);

if (counter2>0) {
	counter2=0;
	for (i=0;i<counter;i++) {
		if (num_of_each[i]>=((cutpoint*num_topologies)/100)) {
			printf("     Group %d %d%% (",counter2,(num_of_each[i]*100)/num_topologies);
			fprintf(stderr,"     Group %d %d%% (",counter2,(num_of_each[i]*100)/num_topologies);
			counter3=0;
			for (j=0;j<ntax;j++) {
				final_groups[counter2][j]=big_groups[i][j];
				if (big_groups[i][j]) {
					++counter3;
					fprintf(stderr,"%s",taxon_name[j]);
					if (counter3<big_groups[i][ntax]) fprintf(stderr," ");
					printf("%s",taxon_name[j]);
					if (counter3<big_groups[i][ntax]) printf(" ");
					}
				}
			final_groups[counter2][ntax]=big_groups[i][ntax];
			fprintf(stderr,")\n");
			printf(")\n");
			counter2++;
			}
		}
	}

/*gets names of groups*/
group_names=(char *)malloc(FIFTEEN_BITS_MAX*sizeof(char));
assert((int)group_names);
temp1=(char *)malloc(FIFTEEN_BITS_MAX*sizeof(char));
assert((int)temp1);
counter3=1;
group_names[0]='(';
for (i=0;i<ntax;i++) {
	for (j=0;j<strlen(taxon_name[i]);j++) group_names[counter3++]=taxon_name[i][j];
	group_names[counter3++]=' ';
	if (i==(ntax-1)) group_names[counter3++]=')';
	}
group_names[counter3++]='\0';
/*fprintf(stderr,"%s\n",group_names);*/
if (counter2>0) {
	for (i=ntax-1;i>1;i--) {
		for (j=0;j<counter2;j++) if (final_groups[j][ntax]==i) { /*add in and paren*/
			/*remove names in new group from group_name*/
			first=-1;
			for (k=0;k<ntax;k++) if (final_groups[j][k]) {
				/*remove name*/
				in=find_name(taxon_name[k],group_names);
				/*fprintf(stderr,"In %d ",in);*/
				if (!in) {
					fprintf(stderr,"error in making name--can't find %s in %s\n",taxon_name[k],group_names);
					exit(-1);
					}
				cur_len=strlen(group_names);
				for (l=in;l<cur_len-strlen(taxon_name[k])-1;l++) group_names[l]=group_names[l+strlen(taxon_name[k])+1];
				group_names[cur_len-strlen(taxon_name[k])-1]='\0';
				/*if ((cur_len-(strlen(taxon_name[k])-1)) != strlen(group_names)) fprintf(stderr,"Problems in termination\n");*/
				if (first<0) first = in;
				/*fprintf(stderr,"%d %s\n",k,group_names);*/
				}
			/*copy end to temp*/
			/*for (k=first;k<strlen(group_names);k++) temp1[k-first]=group_names[k];
			temp1[k]='\0';*/
			strcpy(temp1,group_names+first);
			/*fprintf(stderr,"gn %s\ntemp1 %s\n",group_names,temp1);*/
			/*add in parens and names*/
			/*fix for group stuff*/
			group_names[first++]='(';
			for (k=0;k<ntax;k++) if (final_groups[j][k]) {
				for (l=0;l<strlen(taxon_name[k]);l++) group_names[first++]=taxon_name[k][l];
				group_names[first++]=' ';
				}
			group_names[first++]=')';
			group_names[first++]=' ';
			for (k=0;k<strlen(temp1);k++) group_names[first++]=temp1[k];
			group_names[first]='\0';
			/*fprintf(stderr,"%s\n",group_names);*/
			}
		}
	}

/*filter spaces*/
ba=(char *)malloc((1+strlen(group_names))*sizeof(char));
assert((int) ba);
counter3=0;
for (i=0;i<strlen(group_names);i++){
	if (!isspace(group_names[i])) ba[counter3++]=group_names[i];
	else if (isspace(group_names[i]) && (group_names[i+1]!=')')) ba[counter3++]=group_names[i];
	}
ba[counter3]='\0';
ba=(char *)realloc(ba,(1+strlen(ba))*sizeof(char));
assert((int) ba);

fprintf(stderr,"%s\n",ba);
printf("%s'\n",ba);
if (counter2>0) counter3=print_jack2(ba,1,taxon_name,ntax,final_groups,counter2);

/*values->acgt=values->farris=values->inter_dig=values->dot=values->hen_gap=values->paup=values->paup_dot=0;*/
if (num_of_each) free(num_of_each);
if (group_names) free(group_names);
if (temp1) free(temp1);
if (big_groups) {
	for (i=0;i<counter;i++) if (big_groups[i]) free(big_groups[i]);
	free(big_groups);
	}
if (final_groups) {
	for (i=0;i<counter2;i++) if (final_groups[i]) free(final_groups[i]);
	free(final_groups);
	}
return num_there;
}


main (argc, argv)
int argc;
char *argv[];
{
    int x;
    int ntax,num_topologies,cutpoint;
    char **topology, **taxon_name;
    int i,j,k,dont;
    char *temp,hold, *t;

    if (argc < 2) {fprintf(stderr,"Need and argument\n");exit(-1);}
    cutpoint=atoi(argv[1]);
    num_topologies=ntax=0;
    taxon_name=NULL;

    topology=(char **)malloc(1024*sizeof(char *));
    assert((int) topology);
    taxon_name=(char **)malloc(1024*sizeof(char *));
    assert((int) taxon_name);
    for (i=0;i<1024;i++) {
        taxon_name[i]=(char *)malloc(128*sizeof(char));
        assert((int) taxon_name[i]);
    }
    temp=(char *)malloc(FIFTEEN_BITS_MAX*sizeof(char));
    assert((int) temp);
    /*read in topologies*/
    i=j=0;

/*get topologies*/
    hold='a';
    dont=1;
    while (hold != ';')	{
	    hold=getchar();
	    if (hold == '(') dont=1;
	    if (dont) temp[i++]=hold;
	    /*printf("%c",hold);*/
	    if ((hold== '*') || (hold=='[')) {
	        temp[i-1]='\0';
	        /*printf("%s(%d)==>",temp,strlen(temp));*/
            topology[j]=(char *)malloc((1+strlen(temp))*sizeof(char));
            assert((int) topology[j]);
            /*topology[j]=(char *)strcpy (topology[j],temp);*/
            for (k=0;k<strlen(temp);k++) topology[j][k]=temp[k];
            topology[j][k]='\0';
            /*printf("%s\n",topology[j]);*/
            i=0;
            ++j;
            dont=0;
            }
	    }
    num_topologies=j;
    /*for (i=0;i<num_topologies;i++) printf("%s\n",topology[i]);*/
/*get ntax and taxon_names*/
    /*base on first topology*/
    t=topology[0];
    ntax=0;
    for (i=0;i<strlen(t);) {
        if (t[i]=='(') {i++;}
        else if (t[i]==')') {i++;}
        else if (isspace(t[i])) {i++;}
        else {
            /*begin name of taxon*/
            k=0;
            while ((t[i]!=')') && (t[i]!='(') && (!isspace(t[i]))) taxon_name[ntax][k++]=t[i++];
            taxon_name[ntax][k]='\0';
            ++ntax;
            }
    }
fprintf(stderr,"There are %d topologies of %d taxa\n",num_topologies,ntax);
for (i=0;i<ntax;i++) fprintf (stderr,"%s ",taxon_name[i]);
fprintf(stderr,"\n");
/*get the real stuff*/
    x=get_jack_groups(ntax,num_topologies,topology,cutpoint,taxon_name);
    free(temp);
    return 0;
}
