#!/usr/bin/env python2.3

usage_info="""
Generate a sequence set from given models.

Usage: estimation_with_replications.py [options] <input_data_file.txt>
Options:
    -h          Print this help message.
    -c number   Number of Components (default 1)
    -e C|M      Type of estimation (default M)
                C  - clustering
		CM - constrained mixture
                LM - labeled mixture
		M  - mixture (default)
		K  - k-means
    -E          Calculate cluster validation statistics from data labels
                If labels are given in the second column of the dataset
    -i R|K|M|L  Initialization method
                R - random initialization (default)
		K - k-means initialization
		L1 - with labels (equal sizes)
                L2 - with labels (sampled)
                L3 - labels from file
		M <model_file.smo> - given model initialization
    -I          Identifier for repetitions (default 0)
    -l number   Percentage of labels to be use (number/1000)
                Only used in the case that original labels are used
    -D number   Dimension of multivariates
    -L file     Label file
                if not is included, all  labels from the original data set are used
    -m file     Model file (only with conjuntion with -i M)
    -n          Output statistics for number finding the number of components (AIC,BIC,IBIC)
    -p float    Prior for positive constraints learning (should be used with -t)
    -P float    Prior for negative constraints learning (should be used with -T)    
    -r number   Number of Repetitions (default 30)
    -s number   Number of states in the random models (default 2)
                If states is equal to data lengh, mixture of gaussians are used.
    -S          Seed for random initializations (default 0)
    -o file     Initial name scheme for all output file
    -t file     Positive Constraints file
    -T file     Negative Constraints file
    -V number   Peform cross validation (number/100) (not fully implemented)


Example: python estimation_with_replications.py -c 2 -r 30 -e M -i R tiny.txt
         python estimation_with_replications.py -c 5 -s 17 -r 1 -o 'aux' -e CM -t 'cho5_re.txt-RL-1-0.05-0.0.const' -p 3000 -m 'cho5-RRRFilterNorm-p-' cho5_re.txt 
Notes:
 * Requires the ghmm and mixture.py modules 
 * If the sequence file already exists, the new sequence set is appended.
"""

import GQLMixture
import GQLCluster
import GQLValidation
import ViterbiDecomposition
import numpy as Numeric
import getopt
from random import *
from GQLEvaluation import *
import matplotlib.mlab as MLab
import ghmm
from GO.GODag import *
from PPI.PPI import *
import copy
import mixture


def constraintDistribution(cluster,constraints,no):
  resw = Numeric.zeros(no,Numeric.float)
  resb = Numeric.zeros((no,no),Numeric.float)
  resb2 =  Numeric.zeros((no,2),Numeric.float)
  count = Numeric.zeros(no,Numeric.float)
  count2 =  Numeric.zeros(no,Numeric.float)
  size = len(cluster)

  #print cluster
  #print constraints
  
  for i in range(size):
    if sum(constraints[i]) > 0.0:
      count[cluster[i]] += 1
      
    for j in range(size):
        if i != j:
	  resb[cluster[i]][cluster[j]] += constraints[i][j]
          if cluster[i] != cluster[j]:
            resb2[cluster[i]][1] += constraints[i][j]
          else:
            resb2[cluster[i]][0] += constraints[i][j]
        else:
          resw[cluster[i]] += 1
          

  sumc = Numeric.sum(resb,axis=1)
  suml = Numeric.sum(resb,axis=0) + 0.00001
  aux = [list(resw)]
  aux.append(list(size-resw))
  resw2 = Numeric.array(aux,Numeric.float)
  aux = resb*sumc/(size*size)
  neg_resw = Numeric.transpose([Numeric.diagonal(aux),Numeric.sum(aux-Numeric.eye(no,no)*Numeric.diagonal(aux),axis=0)])


  return resb,Numeric.transpose(resb/suml),(Numeric.transpose(Numeric.transpose(resb)/resw)/resw)/sum(sum(Numeric.transpose(Numeric.transpose(resb)/resw)/resw)), resb2, Numeric.transpose(Numeric.transpose(resb2)/suml), Numeric.transpose((Numeric.transpose(resb2)/(resw2))), neg_resw


def joinDouble(mat,separator):
  res = str(mat[0])
  for i in mat[1:]:
    res = res + separator + str(i)
  return res

def printTable(table):
        s = ""
	saux = ""
	for j in range(len(contigencyTable[0])):
		s += "%4d" % (j+1)
		saux+="----"
	s += " \n"+saux+"\n"

	for i in range(len(table)):
		s += "%4d |" % (i+1)
		for j in range(len(contigencyTable[i])):
			s += "%4d" % contigencyTable[i,j]
		s +="\n"
	return s

class Usage(Exception):
    def __init__(self, msg):
        self.msg = msg

class random_models_args:
	def __init__(self):
		self.number_of_states = nr_states
		self.number_of_models = num_clusters
		self.total_duration = duration
		self.parameters = 1
		self.default_variance = 1.0
		self.default_mean = 9.0
		self.noiseModel = 0
                self.dimension = 1
		


def set_random_partial_labels_equal(profileClustering,percentageOfLabels,labelsClasses):
   profileClustering.partials = []
   profileClustering.partial_label = {}

   no_classes = len(labelsClasses)
   no_labels = len(profileClustering.profileSet)*percentageOfLabels
   k = int(no_labels/no_classes)
   if k == 0:
       k = 1

   print no_classes, no_labels, percentageOfLabels,k

   for i,c in enumerate(labelsClasses):
       selected = sample(range(max(k,len(c))),k)
       profileClustering.partials.append([])
       for s in selected:
           profileClustering.partials[i].append(c[s])

       for s in selected:
           profileClustering.partial_label[c[s]] = i
   print "labels", profileClustering.partials

def set_random_partial_labels(profileClustering,percentageOfLabels,labelsClasses):
   profileClustering.partials = []
   profileClustering.partial_label = {}

   for i,c in enumerate(labelsClasses):
       print len(c),percentageOfLabels
       k = int(len(c)*percentageOfLabels)
       if k == 0:
            k = 1
       print c,k
       selected = sample(range(len(c)),k)
       profileClustering.partials.append([])
       for s in selected:
           profileClustering.partials[i].append(c[s])

       for s in selected:
           profileClustering.partial_label[c[s]] = i
   print "labels", profileClustering.partials


def replications(profileSet, model,repetitions, initializationMethod,estimationMethod,inputmodel,perlabels,labelFile,
		 constr,neg_constraints,prior_weight,prior_neg_weight,prior_type,maxIter):
        currentProfile = None
	profiles = []

        print 'replications'

	if(initializationMethod in ('L1','L2')):
	    labels = profileSet.seq_classes
    	    labelsClasses = []
            for i in range(max(labels)):
                labelsClasses.append([])
            for i,l in enumerate(labels):
                labelsClasses[l-1].append(i)

        count = repetitions
        j = 0
	#maxError = 20
	#countError = 0
        while j < count:
  	    try:
		alpha = []


		currentProfile = GQLCluster.ProfileClustering()
		currentProfile.setProfileSet(profileSet)


                print 'before initialization'

                if estimationMethod in ('LM','C','CM','M'):
                  if(initializationMethod == 'R'):			
                      currentProfile.randomModels(model)
                      w = currentProfile.estimateFromRandomWeightsExternal()
                  elif(initializationMethod == 'K'):
                       # estimate from kmeans results
                      currentProfile.randomModels(model)
                  elif(initializationMethod in ('L1','L2','L3')):
                      print 'init', initializationMethod
                      currentProfile.randomModels(model)
                      print currentProfile.models
                      if(initializationMethod == 'L1'):
                          set_random_partial_labels(currentProfile,perlabels,labelsClasses)
                      elif(initializationMethod == 'L2'):
                          set_random_partial_labels_equal(currentProfile,perlabels,labelsClasses)
                      elif(initializationMethod == 'L3'):
                          currentProfile.ReadPartialAssignment(labelFile)
                      currentProfile.estimateFromPartialAssignment() # estimate from random assigments
                  else:
                      currentProfile.readModels(inputmodel,'xml')

                print 'after'

                currentProfile.max_iter=maxIter
                currentProfile.eps=0.1
               

                if(estimationMethod == 'LM'):  
		    currentProfile.computeClustering('PartiallySupervisedMixture')
		elif(estimationMethod == 'M'):
		    currentProfile.computeClustering('MixtureExt')		
		elif(estimationMethod == 'C'):
		    [currentProfile.ml,
		     currentProfile.cluster,
		      currentProfile.p] = GQLMixture.estimate_clustering(currentProfile.modelList(),
		      							profileSet.ghmm_seqs,
                                                                        10, 0.01,currentProfile.fixed_models)
		elif(estimationMethod == 'CM'):
		    currentProfile.prior =  prior_weight
		    currentProfile.prior_neg =  prior_neg_weight
		    currentProfile.constraints = [constraints,neg_constraints]
		    currentProfile.computeClustering('MixtureConstrained')
		    
		elif(estimationMethod == 'K'):
                    currentProfile.no_clusters=model.number_of_models
                    currentProfile.no_repetitions=1
                    currentProfile.computeClustering('Kmeans')
 		elif(estimationMethod == 'H'):
                    currentProfile.no_clusters=model.number_of_models
                    currentProfile.no_repetitions=1
                    currentProfile.computeClustering('Hierarchical')
                    print currentProfile.P
                    
##		elif(estimationMethod == 'M'):
##  		    [currentProfile.ml,
##		     currentProfile.alpha,
##		     currentProfile.p] =    GQLMixture.estimate_mixture(currentProfile.modelList(),

##			    					        profileSet.ghmm_seqs, 10, 0.01, 					 			                        	currentProfile.fixed_models)
##	            currentProfile.cluster = GQLMixture.decode_mixture(currentProfile.p, num_clusters)
##                elif(estimationMethod == 'CMO'):		

##                    if prior_type == 1:
##    		      [currentProfile.ml,
##		       currentProfile.alpha,
##		       currentProfile.p] =    GQLMixture.estimate_mixture(currentProfile.modelList(),
##			      					        profileSet.ghmm_seqs, 10, 0.01, 					 			                        	currentProfile.fixed_models,
##									previous_weights=w,
##									constraints=constr,prior_weight=prior_weight,
##									prior_type=prior_type)
##                    if prior_type == 2:
##    		      [currentProfile.ml,
##		       currentProfile.alpha,
##		       currentProfile.p] =    GQLMixture.estimate_mixture(currentProfile.modelList(),
##			      					        profileSet.ghmm_seqs, 10, 0.01, 					 			                        	currentProfile.fixed_models,
##									previous_weights=w,
##									neg_constraints=neg_constraints,
##									prior_neg_weight=prior_neg_weight,
##									prior_type=prior_type)
##                    if prior_type == 3:
##    		      [currentProfile.ml,
##		       currentProfile.alpha,
##		       currentProfile.p] =    GQLMixture.estimate_mixture(currentProfile.modelList(),
##			      					        profileSet.ghmm_seqs, 10, 0.01, 					 			                        	currentProfile.fixed_models,
##									previous_weights=w,
##									constraints=constr,
##									prior_weight=prior_weight,
##									neg_constraints=neg_constraints,									                        prior_neg_weight=prior_neg_weight,		    
##									prior_type=prior_type)
##                      currentProfile.cluster = GQLMixture.decode_mixture(currentProfile.p, num_clusters)		      
##		if(estimationMethod == 'LMO'):
##  		    [currentProfile.ml,
##		     currentProfile.alpha,
##		     currentProfile.p] =    GQLMixture.estimate_mixture_partials(currentProfile.modelList(),
##			    					                 profileSet.ghmm_seqs,
##                                                                                 10, 0.01,currentProfile.fixed_models,
##									         currentProfile.partial_label)
##	            currentProfile.cluster = GQLMixture.decode_mixture(currentProfile.p, num_clusters)

                currentProfile.clusters = []
                for i in xrange(num_clusters):
                   currentProfile.clusters.append([])
                for i, c in enumerate(currentProfile.cluster):
                    currentProfile.clusters[c].append(i)
		profiles.append(currentProfile)
	        j += 1
            except OverflowError:
	        print "repetition failed %i"%j
		print OverflowError
	return (profiles)

def resultsEvaluation(clusteringProfile,viterbiGroups):

        #do viterbi
        if( viterbiGroups != 0):
	    cluster = clusteringProfile.cluster
            no_clusters = 0
            for j in range(num_clusters):
              i = 0
	      cluster_aux = GQLCluster.ProfileSubSet(clusteringProfile.profileSet,clusteringProfile.clusters[j])
  	      vitDecomp = ViterbiDecomposition.ViterbiDecomposition(cluster_aux.getSequenceSet(),
								    clusteringProfile.modelList()[j])
              grouping = vitDecomp.computeFixedNumberDecomposition(viterbiGroups)
  	      i = 0

  	      for group in grouping:
                  for element in group:
                      cluster[cluster_aux.ids[element]] = no_clusters + i
                  i += 1

	      no_clusters =+i

	    clusteringProfile.cluster = cluster

        print clusteringProfile.cluster
	(contigencyTable, rand, spe,
         sen ,cr, error,pp,np)= GQLValidation.computeExternalIndices(clusteringProfile.profileSet.seq_classes,
								     clusteringProfile.cluster,
	                                                             clusteringProfile.profileSet.classes_no,max(clusteringProfile.cluster)+1)
	print contigencyTable        
	return (rand, spe, sen, contigencyTable, error, pp, np)


def loadEvalData(data,profile,level,ont):	
    if( data == 'GO' or data == 'GOSlim' or data=='GOAll'):
        go = GODag()
        go.loadTermsFromFile('../../GO/gene_ontology.obo')
        go.loadGenesFromFile('../../GO/gene_association.sgd')
        if( data == 'GOSlim' ):
            go = go.getGOSlim('goslim_yeast')
	
	if( data == 'GO' or data == 'GOSlim'):
	  (mixtureAno,mask,labelnames) = mixtureFromGoMapping(go,profile.genename,level,ont)
	elif (data == 'GOAll'):
	  (mixtureAno,mask,labelnames) = mixtureFromGoMapping(go,profile.genename,level,ont,all=1,max=300,min=2)    
    elif (data == 'PPI'):
        ppi = PPIGraph()
	ppi.loadPPIFromFile('../../PPI/identifications.txt')
	(mixtureAno,mask, labelnames) = mixtureFromPPI(ppi.baitTargets,profile.genename)
    elif (data == 'RRR'):
        ppi = PPIGraph()
	ppi.loadPPIFromTabFile('../../PPI/regulee-tor.txt')
	(mixtureAno,mask, labelnames) = mixtureFromPPI(ppi.baitTargets,profile.genename)
    return mixtureAno, mask 	



if __name__ == '__main__':

        import sys

	# default settings
        noOfClusters = 1
        fileName = ""	
        nr_states = "[17]"
        viterbiGroups = 0
        repetitions = 30
        initializationMethod = 'R'
        estimationMethod = 'M'	
        inputModel = ""
	labels = 0
	noOfClusters = 0
	sd=0	
	outputFile = ""
	constraintsFile = ""
	prior_type=0
	prior_weight = 0.0
	prior_neg_weight = 0.0
	crossvalidate = 0.0
	negconstraintsFile = ''
	id = 0
	data = ''
	eval = 1
        labelFile = ''
        maxIter=10
        dimension=1

        argv = sys.argv
        print argv	

        try:
            opts, args = getopt.getopt(argv[1:], 'h:c:e:i:I:l:L:m:M:o:r:S:v:n:s:p:P:t:T:V:d:P:D:')
        except getopt.error, msg:
	     print usage_info
             raise Usage(msg)

        # process options
        for o, a in opts:
            if o in ('-h'):
                print __doc__
                exit()
            if o in ('-c'): 
                # Number of components
                num_clusters = int(a)
	    if o in ('-e'):
    	        estimationMethod = a    
            if o in ('-E'):
                # Perform evaluation              
		eval = int(a)		
            if o in ('-r'):
                # Nunber of repetitions
                repetitions = int(a)
            if o in ('-M'):
                # Model file as input
                maxIter = int(a)               
            if o in ('-m'):
                # Model file as input
                inputModel = a
            if o in ('-o'):
                # File name for outputs
                outputFile = a		
	    if o in ('-t'):
		#positive constraints file
		print 't'
		constraintsFile = a
	    if o in ('-T'):
	        print '-T'
	        # negative constraints file
		negconstraintsFile = a
	    if o in ('-n'):
		# print statitics for end evaluation
		noOfClusters = 1		    
	    if o in ('-p'):
		# prior weight
		prior_weight = float(a)/1000
	    if o in ('-P'):
		# prior weight
		prior_neg_weight = float(a)/1000
            if o in ('-s'):
                # Number of states
                int(a)
		nr_states = "["+a+"]"
            if o in ('-S'):
                # Seed              
		sd = int(a)		
	    if o in ('-V'):	        
		crossvalidate = float(a)/100
	    if o in ('-I'):	        
		id = float(a)
	    if o in ('-i'):	        
		initializationMethod = a		
	    if o in ('-d'):
		data = a
            if o in ('-L'):
                labelFile = a
            if o in ('-D'):
		dimension = int(a)
		
	fileName = argv[-1]
        if fileName == "":
            raise Usage('No input file specified')

        if(initializationMethod == 'M'):
            repetitions = 1
	    if (inputModel == ""):
    	        raise Usage('option -i M requires the definition of a initial model')

        if(initializationMethod == 'L3'):
            repetitions = 1

	if(initializationMethod in ('L1','L2') ):
	    labels = labels/100
	    if (labels == 0):
    	        raise Usage('option -i L requires the definition of the percentage of labels')
		
        if outputFile == "":
	   outputFile = fileName
	   
	neg_constraints = None
	constraints = None

	if negconstraintsFile != '' and constraintsFile != '':
	  prior_type =3
	elif constraintsFile != '':
	  prior_type =1
	elif negconstraintsFile != '':
	  prior_type =2
	  if prior_neg_weight == 0.0:
	    prior_neg_weight = prior_weight
	    prior_weight = 0
	 
	if prior_type == 1 or prior_type == 3:
	    [ids,constraints] = GQLValidation.readMixture(constraintsFile)	
	if prior_type == 2 or prior_type == 3:
	    [ids,neg_constraints] = GQLValidation.readMixture(negconstraintsFile)
	if prior_type == 3:
	    sump = Numeric.sum(Numeric.sum(constraints))
	    sumn = Numeric.sum(Numeric.sum(neg_constraints))
	    if prior_neg_weight == 0.0:
 	      prior_neg_weight = (prior_weight*sump)/(2*(sumn+sump))
	      prior_weight = (prior_weight*sumn)/(sumn+sump)

	print 'priors', prior_type, prior_neg_weight, prior_weight, negconstraintsFile, constraintsFile
	    
	if sd != 0:
	  seed(sd)

        profileSet = GQLCluster.ProfileSet()
	if "sqd" in fileName:
	    sequenceSet = ghmm.SequenceSet(ghmm.Float(),fileName)
            profileSet.ReadDataFromDSequences(sequenceSet,fileName)
	else:
            profileSet.ReadDataFromCaged(fileName)

	if crossvalidate > 0.0:
	   size = len(profileSet)
	   testSet = sample(range(size),int(size*crossvalidate))
           testSet.sort()
	   trainSet = []
	   includedGenes = Numeric.zeros(size)
	   for i in range(size):
	     if i not in testSet:
	       trainSet.append(i)
	       #includedGenes[i] = 1
           print testSet
  
	   if constraints != None:
	     orig_constraints = copy.deepcopy(constraints)
	     constraints = Numeric.take(constraints,trainSet,axis=0)
	     constraints = Numeric.take(constraints,trainSet,axis=1)
	     test_constraints =  Numeric.take(orig_constraints,testSet,axis=0)
	     test_constraints =  Numeric.take(test_constraints,testSet,axis=1)    
	   if  neg_constraints != None:
	     orig_neg_constraints = copy.deepcopy(neg_constraints)	   
	     neg_constraints = Numeric.take(neg_constraints,trainSet,axis=0)
	     neg_constraints = Numeric.take(neg_constraints,trainSet,axis=1)
	     test_neg_constraints =  Numeric.take(orig_neg_constraints,testSet,axis=0)
	     test_neg_constraints =  Numeric.take(test_neg_constraints,testSet,axis=1)    	     

	   profileOrig = profileSet   
	   profileSet = profileOrig.getSubset(trainSet)
	   profileTest = profileOrig.getSubset(testSet)
	   
	   file = open(fileName+'-'+str(id)+'test.txt','w')
	   for i in testSet:
		file.write(str(i)+'\n')
	   file.close()

	   #print 'depois do corte', len(profileOrig)
	   #print len(profileSet)
	   #print len(testSet)
	   if constraints != None:
 	     print len(constraints)
	     print len(test_constraints)
	   	   
	    
        duration = len(profileSet[0])
        random = random_models_args()
	random.nr_states = nr_states
        random.dimension = dimension

	profiles = replications(profileSet,random,repetitions,initializationMethod,estimationMethod,
				inputModel,labels, labelFile, constraints,neg_constraints,
                                prior_weight,prior_neg_weight,prior_type,maxIter)

	# evalutation of results

        # printing the results of all replications
        outfile = open("res-"+outputFile+"-%i-%s-%i-%f-%f-%i.txt"%(num_clusters,nr_states,prior_type,
								prior_weight,prior_neg_weight,id),'w')
        #outfileCt = open("ctt-"+outputFile+"-%i%s-%s-%s-%i.txt"%(num_clusters,nr_states,initializationMethod,
	#							 estimationMethod,viterbiGroups),'w')

        rands = []
        sens = []
        spes = []
        pps = []
        nps = []
        errors = []
	aics = []
	bics = []
	ibics = []
	lls = []
	diags = []
	diagsn = []

        for i,p in enumerate(profiles):
		
            lls.append(p.ml) 
	    if eval:	
              (rand, spe, sen, contigencyTable,error,pp,np) = resultsEvaluation(p,viterbiGroups)
	      rands.append(rand)
	      sens.append(sen)
	      spes.append(spe)
              errors.append(error)
              pps.append(pp)
              nps.append(np)
	    else:
	      rand = 0
	      sen = 0
	      spe = 0
              error = 0
	      rands.append(0)
	      sens.append(0)
	      sens.append(0)
              errors.append(0)
              pps.append(0)
              nps.append(0)	      	    

    	    if(noOfClusters):
	        aic = GQLValidation.computeAIC(p.ml,p.modelList())
	    	bic = GQLValidation.computeBIC(p.ml,len(profileSet),p.modelList())
		ibic = GQLValidation.computeICL(p.ml,len(profileSet),p.modelList(),p.p)
	        outfile.write("\t%f\t%f\t%f"%(aic,bic,ibic))
		aics.append(aic)
		bics.append(bic)
		ibics.append(ibic)
	    


	    #outfileCt.write(printTable(contigencyTable))

	    if prior_type == 1:
 	      [resc, resd, resn, res2c, res2d, res2w, neg_resw] = constraintDistribution(p.cluster,constraints,num_clusters)
	      #print resc, resd
	      #print 'tama', sum(sum(constraints)), sum(sum(resc))

	      diag = sum(Numeric.diagonal(resd))
	      diagn = 0.0
	    elif  prior_type == 2:
 	      [resc, resd, resn, res2c, res2d, res2w, neg_resw] = constraintDistribution(p.cluster,neg_constraints,
											 num_clusters)
	      #print resc, resd
	      diagn = sum(Numeric.diagonal(resd))
	      diag = 0.0	    
	    
	    elif prior_type == 3:
 	      [resc, resd, resn, res2c, res2d, res2w, neg_resw] = constraintDistribution(p.cluster,neg_constraints,
											 num_clusters)
	      #print resc, resd
	      diagn = sum(Numeric.diagonal(resd))
	      
 	      [resc, resd, resn, res2c, res2d, res2w, neg_resw] = constraintDistribution(p.cluster,constraints,num_clusters)
	      #print resc, resd
	      diag = sum(Numeric.diagonal(resd))
	    else:
	      diag = 0.0
	      diagn = 0.0
	    
	      
	    diags.append(diag)
	    diagsn.append(diagn)
	    	      
	    outfile.write("%i\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n"%(repetitions,rand, spe, sen, error, pp, np, p.ml,diag,diagn))
		    

        bestProfile = profiles[Numeric.argmax(lls)]
        bestModels = bestProfile.modelList()
        outFile = outputFile+'-'+str(prior_type)+'-'+str(prior_weight)+'-'+str(prior_neg_weight)+'-'+str(id)
        bestProfile.writeModels(outFile+'.xml','HMM')
        bestProfile.writeSeqMixDistributions(outFile+'.spd')
        
##	for m in bestModels:
##	  if prior_type == 2:	
##	    m.write(outputFile+'-'+str(prior_type)+'-'+str(prior_weight)+'-'+str(id)+'.xml')        
##          else:	    
##	    m.write(outputFile+'-'+str(prior_type)+'-'+str(prior_neg_weight)+'-'+str(id)+'.xml')

        outfile.close()
        #outfileCt.close()

	
	if data != '':
	  [mixtureAno, mask] = loadEvalData(data,profileSet,2,[0,1,2])
	  #expt=Numeric.transpose(bestProfile.p)
	  expt = bestProfile.p
	  (g,n,labPart, tp, fp, fn, tn, mtp, mfp, mfn, mtn) = clusterEvaluationMixture(expt,mixtureAno,mask,
											 entropy=1)
	  cr = GQLValidation.correctedRand(tp, fn, fp, tn, maxi=GQLValidation.rand(mtp,mfn,mfp,mtn))
	  crt = 0.0
	  if crossvalidate > 0:
	    # setting the initial ws	  
	    [ml,alpha,w] = GQLMixture.estimate_mixture(bestProfile.modelList(),profileTest.ghmm_seqs, 0, 0.01, 					 			        bestProfile.fixed_models,alpha=bestProfile.alpha)	  
	    if prior_type == 1:
 		    [ml,alpha,p] = GQLMixture.estimate_mixture(bestProfile.modelList(),
			      					        profileTest.ghmm_seqs, 0, 0.01, 					 			                        	bestProfile.fixed_models,
									previous_weights=w, alpha=alpha,
									constraints=test_constraints,
							                prior_weight=prior_weight,
									prior_type=prior_type)
	    if prior_type == 2:
                     [ml,alpha,p] =  GQLMixture.estimate_mixture(bestProfile.modelList(),
			      					        profileTest.ghmm_seqs, 0, 0.01,					 			                        	        bestProfile.fixed_models,
									previous_weights=w,alpha=alpha,
									neg_constraints=test_neg_constraints,
									prior_neg_weight=prior_neg_weight,
									prior_type=prior_type)
	    if prior_type == 3:
		    [ml,alpha,p] = GQLMixture.estimate_mixture(bestProfile.modelList(),
			      					        profileTest.ghmm_seqs, 0, 0.01, 					 			                        	bestProfile.fixed_models,
									previous_weights=w,alpha=alpha,
									constraints=test_constraints,
									prior_weight=prior_weight,
									neg_constraints=test_neg_constraints,									                        prior_neg_weight=prior_neg_weight,		    
									prior_type=prior_type)
		    
	    [mixtureAno, mask] = loadEvalData(data,profileTest,2,[0,1,2])
	    (g,n,labPart, tp, fp, fn, tn, mtp, mfp, mfn, mtn) = clusterEvaluationMixture(p,mixtureAno,mask,
											 entropy=1)
	    crt = GQLValidation.correctedRand(tp, fn, fp, tn, maxi=GQLValidation.rand(mtp,mfn,mfp,mtn))

	  #print 'rand', cr, crt

	  #if eval:	
          #    (randTest, senTest, speTest, contigencyTable) = resultsEvaluation(profileTest,viterbiGroups)

        outfile = open("res-"+outputFile+"-%i-%s-%i.txt"%(num_clusters,nr_states,prior_type),'a')
	if( repetitions == 1):
	    rands.append(rands[0])
	    sens.append(sens[0])
	    spes.append(spes[0])

        if(initializationMethod in ('L1','L2','L') ):
	    if( prior_type != 0):
 	        outfile.write("%f\t%f\t"%(prior_weight,prior_neg_weight))

		
 	    if eval:
	        outfile.write("%f\t%f\t%f\t%f\t%f\t%f\t"%(labels,MLab.mean(rands),
							      MLab.mean(spes),
							      MLab.mean(sens),
							      MLab.mean(pps),
                                                              MLab.mean(nps)))
	    if data != '':
                outfile.write("%f\t%f\t"%(cr,crt))
		
	    if( noOfClusters):
 	        outfile.write("%f\t%f\t%f\t%f\t%f\t%f\t%f\t"%(labels,MLab.mean(aics),
							      MLab.mean(aics),MLab.mean(bics),
							      MLab.mean(bics),MLab.mean(ibics),
							      MLab.mean(ibics)
							      ))

	    outfile.write(" ".join(argv)+"\n")	    
	else:
	    if( prior_type != 0):
 	        outfile.write("%f\t%f\t"%(prior_weight,prior_neg_weight))
	    if eval:
  	        outfile.write("%f\t%f\t%f\t%f\t%f\t%f\t"%(num_clusters,MLab.mean(rands),
							     MLab.mean(spes),
							     MLab.mean(sens),
							      MLab.mean(pps),
                                                              MLab.mean(nps)))            
	    if data != '':
                outfile.write("%f\t%f\t"%(cr,crt))		    
		    
	    if( noOfClusters):
 	        outfile.write("%f\t%f\t%f\t%f\t%f\t%f\t"%(MLab.mean(aics),
							      MLab.mean(aics),MLab.mean(bics),
							      MLab.mean(bics),MLab.mean(ibics),
							      MLab.mean(ibics)))

	    
	    outfile.write("\t".join(argv)+"\n")


	outfile.close()











