#!/usr/bin/env python2.3
################################################################################
#
#       This file is part of the GQL (Graphical Query Language) Toolkit
#
#       file:   GQLMotif.py
#       author: Alexander Schliep (alexander@schliep.org)
#
#       Copyright (C) 2003-2004 Alexander Schliep
#
#       Contact: alexander@schliep.org
#
#       Information: http://ghmm.org/gql
#
#	GQL is free software; you can redistribute it and/or modify
#	it under the terms of the GNU General Public License as published by
#	the Free Software Foundation; either version 2 of the License, or
#	(at your option) any later version.
#
#	GQL is distributed in the hope that it will be useful,
#	but WITHOUT ANY WARRANTY; without even the implied warranty of
#	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#	GNU General Public License for more details.
#
#	You should have received a copy of the GNU General Public License
#	along with GQL; if not, write to the Free Software
#	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#
#
#       This file is version $Revision: 477 $
#                       from $Date: 2004-11-15 09:00:56 -0300 (Mon, 15 Nov 2004) $
#             last change by $Author: filho $.
#
################################################################################
'''
Brief explanations of the class and its roles (to be extended).

SequenceSet - This is a general class for storing a list of sequence data
from a given set of genes, implemented as the ProfileSet class. It should
behave like an array, so basic arrays methods should be there (__len__,
__getitem__, ...).

UpstreamSet - Specialization of the above class. This is the object that
we really would work at the motif analysis. Methods for loading data files
or geting the sequence from web pages should be implemented here.

TFBSModel - General class for defining a model of a given TFBS. Classes
implementing the Models, like PWMs, HMMS, should extend this one, and no
specific implementation should be carried here. The searchHits method
should behave like an abstract method, and should be override in all
specializations of this class.

TFBSPWM - Example of a speciallization of the above class.

TFBSHit - General class for defining the hit data of a given upstream
region given a model. Again, for specific representations of this data,
other classes like TFBSDataHitsCount, TFBSDataTransfac, should be
implemented.

TFBSHitsCount - example of a specialization of the TFBSData, where
only the number of hits found in the sequence is taken into consideration.

TFBSSet - Class for storing the hits information of a set of genes. It
should be implemented as a mapping for the sacke of efficiency and basic
mapping methods should also be implemented here (__len__, __getitem__, and
so on). Methods for loading a hit information files should be implemented
here.

MotifAnalysis - main class that takes care of all high level methods
in the motif analysis. The main methods for motif analysis (or
wrapping of existing tools) should be implemented here. It has a
TFBSSet, SequenceSet and a list of TFBSModels. Methods for loading a
set of TFBSModels, for doing the the motif analysis, or building
wrappers around motif analyses methods should be implemented here.
'''

#----- Globals ----------------------------------------------------------------
gYEASTURL="http://www.yeastgenome.org/"


#------------------------------------------------------------------------------

class SequenceSet:
   """ Contains information about sequences of a gene
   This class contains methods for load the data from data
   files or from obtaining from url queries"""

   def __init__(self):
       self.sequences = [] # sequences of the genes
       self.accs = [] # id of the genes respective to the index array 
       self.source = "" # source of the data (ie. url or data file)

   def __getitem__(self, i):
        return self.sequences[i]

   def __len__(self):
        return len(self.sequences)

   def subSet(self,listOfGenes):
        pass
    

class UpstreamSet(SequenceSet):
    """ Contains information about upstream sequences of a gene
    This class contains methods for load the data from data
    files or from obtaining from url queries"""


    def loadFromFile(self,file):
        """ Class for loading the data from a given file """
        pass

    def loadFromYeast(self,geneList):
        """ This method loads the data from the Yeast database""" 
        pass
   

class TFBSModel:
    """ This is a generic class for modelling a given TFBS. All
    TGBSModels should extend this one and override the searchhits.
    Think about methods to put here
    """

    def __init__(self, id):
        self.id = id # id of the motif
        self.sequences = [] #origial sequences that generated the model (optional)

    def searchHits(self,sequence, type,**tp):
        """ return a TFBSData object, if any hit is
        encountered given a sequence and a set of specific
        paramenters. One should define also the type of TFBSData to be returned by
        this methos (see TFBSData object).
        This method should be viewed as abstract, and should be reimplemented
        by each specific model"""
        pass
    

class PWMModel(TFBSModel):
    """ Class implementing a PWM model """
    
    def __init__(self):
        self.pwm = [] # position matrix 
        self.background = "" # back ground signal
       
    def searchHits(self,sequence, **qw):
        """ return a TFBSData object if any hit is encountered."""
        pass

   
class TFBSHit:
    """ The TFBSHit Data of a given gene.
    For a specific representation of this data a extended
    classs should be defined ie. TFBSBinary, TSBSCount, TFBSTransfac
    """

    def __init__(self):
        self.acc = "" # gene acc
        self.pos = [] # positions of the hits
        self.direction = [] # direction of the hits (1 or -1)
        self.pvalues = 1.0 # p-value of the matching
        self.tfbsmodel = Null #tfbs model related to the hit
        self.parameters = [] # parameters used to obtain such matrix
                            
        
class TFBSHitsCount(TFBSHit):
    """ Binary representation of the data """

    def data(self):
         return len(self.pos)
          
   
class TFBSSet:
    """ Class containg the set of TFBS of a given
    set of genes.

    It can also contains methods for reading
    the information from files (ie. Tavazioe, Transfac)
    It should be implemented as a mapping, due to computational
    efficiency!!!
    """

    def __init__(self):
        self.TFBSSet = {} # mapping acc (gene id) to an array of TFBSData
        self.sequenceSet = Null

    def __getitem__(self, i):
        return self.TFBSSet[i]

    def __len__(self):
        return len(self.TFBSSet)

    def loadFromTransfac(self):
       pass

    def loadFromFile(self):
       pass

    def merge(self,tfbsSet):
       pass

    def subSet(self,genesList):
       pass
   

class MotifAnalysis:
    ''' Class for implementing Motif analysis methods, reading data
    from TFBSModels, calculate the TFBSData given the models and
    the sequence data, and so on. The main methods for motif analysis
    (or wrapping of existing tools) should be implemented here. Also methods
    for saving output files like the incidence matrix, models and so on...'''

    def __init__(self):
        self.TFBSSet = Null # mapping acc (gene id) to an array of TFBSData
        self.sequenceSet = Null
        self.TFBSModels = [] # all Models of TFBS avaliable

    def loadTFBSModelsFromFile(self,file):
        pass

    def loadTFBSModelsFromTransfac(self,file):
        pass

    def analyseSequencesWithPWMs(self):
        pass

    def analyseSequencesWithAlignACE(self):
        """ implements a wrapper around Align ACE,
        loading the results given by the Motif Analyses
        """ 
        pass

    def saveIncidenceMatrices(self):
       ''' return a incidence matrix from the tfbs set '''
       pass
                          
