BioInt
1.02.00
BioInt: An integrative biological object-oriented application framework and interpreter
|
00001 #ifndef BIOMULTIPLESEQUENCEALIGNMENT_H 00002 #define BIOMULTIPLESEQUENCEALIGNMENT_H 00003 00004 #include "BioMultipleFasta.h" 00005 #include "BioOutputStream.h" 00006 #include "BioMultipleGenBank.h" 00007 #include "BioMultipleEmbl.h" 00008 #include "BioMultipleSwissProt.h" 00009 #include "BioMatrix.h" 00010 #include "BioDnaSubstitutionMatrix.h" 00011 #include "BioProteinSubstitutionMatrix.h" 00012 00013 #include <unistd.h> 00014 #include <algorithm> 00015 #include <vector> 00016 #include <stdio.h> 00017 #include<ctype.h> 00018 #include <string.h> 00019 #include <stdlib.h> 00020 #include <getopt.h> 00021 #define SEEK_START 0 00022 #define SEEK_END 2 00023 #define INFTY 0x8000000 00024 00025 00026 struct sequence_info{ 00027 int** s; 00028 int* sl; 00029 int** sn; 00030 int* lsn; 00031 int** gis; 00032 int** relpos; 00033 int** sip; 00034 int* nsip; 00035 }; 00036 00037 struct dp_matrix{ 00038 int** tb; 00039 int** m; 00040 int* a; 00041 int* ga; 00042 int* gb; 00043 int* true_x; 00044 int* true_y; 00045 void* tb_mem; 00046 void* m_mem; 00047 int x; 00048 int y; 00049 }; 00050 00051 class BioMultipleSequenceAlignment 00052 { 00053 vector<BioFasta> aln_seqs; 00054 vector<BioFasta> inp_seqs; 00055 vector<BioFasta> do_complete_alignment(BioMultipleFasta&); 00056 void* tmalloc(int size); 00057 struct dp_matrix* dp_matrix_alloc(struct dp_matrix *dp,int x,int y,int p); 00058 struct dp_matrix* dp_matrix_realloc(struct dp_matrix *dp,int x,int y,int p); 00059 struct dp_matrix* dp_matrix_init(struct dp_matrix *dp,int x,int y); 00060 void dp_matrix_free(struct dp_matrix *dp,int p); 00061 static short int gon250mt[276]; 00062 //struct sequence_info *si2; 00063 struct sequence_info *si; 00064 int numseq;// = 0; 00065 int numprofiles;// = 0; 00066 00067 int** ten_wu_manber(int* seq,int len,int p[]); 00068 00069 struct sequence_info* read_sequences(struct sequence_info* si,char* infile); 00070 double distance_calculation(int** matches[],int len_a,int len_b,int a,int b); 00071 double distance_calculation2(int** matches[],int len_a,int len_b,int a,int b); 00072 00073 void fill_hash(struct sequence_info* si,int** matches[],int nowu); 00074 int* upgma(double **dm,int* tree); 00075 void add_ptm(int** matches[],int** matrix,int a,int b); 00076 struct dp_matrix* consistency_check(struct dp_matrix *dp,int len_a,int len_b,int dia); 00077 int* make_profile(int* prof,int* seq,int len,int** subm); 00078 int** read_matrix(short *matrix_pointer,int** subm,int gpo); 00079 int* main_fast_dyn(int* path,struct dp_matrix *dp,int* prof1,int* prof2,int len_a,int len_b); 00080 00081 int** make_new_profile(int**newp,int** profa,int** profb,int* path); 00082 00083 vector<BioFasta> print_alignment(struct sequence_info* si);//,char* outfile); 00084 struct sequence_info* update(struct sequence_info* si,int** profile,int a,int b,int newnode,int* path); 00085 00086 void set_gap_penalties(int* prof,int len,int nsip); 00087 void update_gaps(int old_len,int*gis,int new_len,int *newgaps); 00088 void update_hash(struct sequence_info* si,int** matches[],int a,int b,int enew); 00089 int gpo; //= 0; 00090 void add_ptm2(int** matches[],struct dp_matrix *dp,int a,int b); 00091 struct dp_matrix* consistency_check2(struct dp_matrix *dp,int len_a,int len_b,int dia); 00092 // char* get_input_into_string(char* string_,string); 00093 00094 public: 00095 //static short int gon250mt; 00096 BioMultipleSequenceAlignment(); 00097 00098 BioMultipleSequenceAlignment(BioMultipleFasta&); 00099 BioMultipleSequenceAlignment(BioMultipleGenBank&); 00100 BioMultipleSequenceAlignment(BioMultipleEmbl&); 00101 BioMultipleSequenceAlignment(BioMultipleSwissProt&); 00102 void showAlignment(ostream& = cout); 00103 string getAlignedSequence(int i); 00104 vector<string> getAlignedSequences(); 00105 string getAlignedSequenceName(int i); 00106 BioFasta getAlignedSequenceInFasta(int i); 00107 BioMultipleFasta getAlignedSequencesInMultipleFasta(); 00108 vector<char> getColumn(int i); 00109 BioMatrix getProfileMatrix(string s); 00110 int getColumnScore(int col_index, string s); 00111 vector<int> getColumnScores(string type); 00112 }; 00113 00114 #endif