BioInt  1.02.00
BioInt: An integrative biological object-oriented application framework and interpreter
BioChain.h
Go to the documentation of this file.
00001 /****************************************************************************
00002 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
00003 The BioBhasha : The Biologist's Programming Language 
00004 Version 1.0 (19th December 2001)
00005 Prasad, B.V.L.S.
00006 Contact: burrashiva@yahoo.com
00007 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
00008 *****************************************************************************/
00009 #ifndef BIOCHAIN_H
00010 #define BIOCHAIN_H
00011 
00012 #include "BioStatistics.h"
00013 #include "BioResidue.h"
00014 
00015 /****************************************************************************
00016  *
00017  * COMMENTARY on Each Class/Concept: Hows and Whys :
00018  *
00019  * BioChain:
00020  *      The explanation for BioResidue is also valid for BioChain also. A 
00021  *      group of Residues make up a Chain. But the number of Chains is a 
00022  *      variable which purely depends on the Protein/DNA. A functional
00023  *      protein may have 2 chains as in HIV proteinase. In PDB it is submitted
00024  *      as 2 Chains, A and B and I for inhibitor. Hence, a Chain described 
00025  *      in a PDB file could be a Functional Protein by itself or a subset of 
00026  *      a functional molecule. Hence BioChain can be made a seperate ADT 
00027  *      describing a part of the molecule. This type of Abstraction of a Protein
00028  *      into small Chains makes the BioObject Communication much clear, easier
00029  *      and efficient from the user's point of view. 
00030  *      As was explained the relation between BioResidue and BioAtom, here also
00031  *      BioChain "Has-A" BioResidues. A BioChain "AGGREGATES" BioResidues. 
00032  *      vector<BioResidue>residue; This takes care of the size of the Chain or 
00033  *      the Protein, without any programmers involvement in DMA code. This is greatest 
00034  *      advantage which STL's give a Programmer. 
00035  *      This type of Abstraction gives the user to access any Residue at any point
00036  *      of the Programming , at any atom. Many Software programs donot give such
00037  *      an option. The aim of BioBhasha has been to give tools and facilities to 
00038  *      the user/Programmer at Atomic Level(literally and Programming-wise). 
00039  *      The Mining of Data can be achieved very clearly and efficiently.
00040  ********************************************************************************/      
00041 class BioChain
00042 {
00043 
00044 protected:
00045 
00046         vector<BioResidue>residue;
00047         char chainId_;
00048 
00049 public:
00050 // Default Constructor....Not recommended ...inherit the notes from BioResidue 
00051 // Commentary also.
00052         BioChain();
00053         virtual ~BioChain();
00054 
00055 // This Constructor expects a PDB file name. It will take only the first encountered
00056 // Chain and terminates populating, once it sees, OXT as the AtomName. This can be used
00057 // when the user is pretty confident that the first Chain provided in the PDB is what 
00058 // he desires.
00059         BioChain(const string& );
00060 
00061 // This Constructor is expected to be the most widely used as this gives the facility of 
00062 // accessing a Chain by a given ChainID from the user. 
00063 // Hence it expects not only the PDB filename as the above one, but also the ChainId which 
00064 // the user is interested in . 
00065         BioChain(const string& ,char );
00066 
00067 // This Constructor is used to instantiate a Chain while reading a PDB file. 
00068         BioChain(const unsigned long& atnumber,const string& atname,const string& resname,const char& ch,
00069                  const long& resnumber, const float& x1, const float&  y1,const float& z1,
00070                  const float&  oc1, const float&  bf1,const string& atrec);
00071 
00072 // Another constructor for convinience when the an external Data is in a vectored BioResidue
00073 // format and is desired to be made a chain to access the Chain Operationalities. 
00074         BioChain(const char& chid, vector<BioResidue> );
00075 
00076 // This method is to set a NEW ChainID to the Chain. This Facility is to 
00077 // make Chain names More Meaningful. For an Enzyme complex, 'E' is more 
00078 // meaningful than 'A' or anyother thing. Similarly, 'I' for inhibitor is 
00079 // more meaningful than 'U' etc....
00080 // My Caution is : Any "set" or "push" Method should be used with restraint.
00081         void setChainId(const char& ch ) ;
00082 
00083 
00084 // This gives out the ResidueNumber, Residue Name and Residue itself...
00085 // as told in BioResidue, These methods are used for logical expression.
00086 // For example: if ( popLastResidueNumber() == presentResidueName) ...
00087 // populate the BioAtom vector present in that Residue.
00088         long getLastResidueNumber() const ;
00089         string getLastResidueName() const ;
00090         char getChainId() const ;
00091         bool findChainId(const char&);
00092         BioResidue& getLastResidue()  ;
00093         vector<BioResidue> getResidues();
00094 
00095 // This gives the Number of Residues in a particular Chain.
00096 unsigned int getNumberOfResidues() const ;
00097 unsigned        int getNumberOfAtoms() const;
00098         int getResidueIndex(const long& ) const;
00099 
00100 // This is a very important Method. This gives the facility to access any 
00101 // residue in a chain, but ONLY BY RESIDUE INDEX....Obviously not by the 
00102 // RESIDUE NAME and *NOT* by RESIDUE NUMBER ALSO, which ambiguates the 
00103 // retreival of the data.
00104 // Where ever, a part of the structure coordinates are obtained, the arguments
00105 // refer to the RESIDUE INDEX and *NOT* to the RESIDUE NUMBER>>>>>>>>
00106 // This is a caution to all the users.........Please Be Careful..............
00107 // Please use getResidueIndex or getAtomIndex member functions and obtained
00108 // the Index values of the respective residues or atoms and give those
00109 // values AS ARGUMENTS TO THESE TYPE OF FUNCTIONS.......
00110 // THIS IS A FACILITY TO REDUCE THE ERRORS....AS SOME PDB FILES OMIT SOME RESIDUE
00111 // NUMBERS IF THE ELECTRON DENSITY IS NOT FOUND FOR SUCH RESIDUES OR ATOMS...
00112 // THIS LITTLE INCONVINIENCE REDUCES THE ERROR-PRONE CHARACTER OF THE PROGRAM
00113 // ENORMOUSLY......SO PLEASE ADOPT THE INCONVINIENCE............
00114         BioResidue getResidue(const int& );
00115 
00116 // This set of methods are most important for analysing only a particular part of a 
00117 // Structure file or a part of the protein or chain under consideration.  This is 
00118 // also expected to be used very much. This returns the content, again as a BioChain 
00119 // making it possible to use all the important member function for manipulation.
00120 // Giving access every bit of information has been the main information, because 
00121 // "The Quality of Research Differs Only in the Resolution at Which One Wants to Work."
00122 // ARGUMENTS ARE INDECES NOT RESIDUE NUMBERS...AS PROVIDED IN THE PDB....USE
00123 // GETRESIDUEINDEX() FUNCTION................
00124         BioChain getChainSegment(const long& first, const long& last);
00125 
00126 // this method gives the ability to get the coordinates of chain segment of only a 
00127 // particular Atom of interest...........please see explanation in showChainSegment.
00128 // ARGUMENTS ARE INDECES NOT RESIDUE NUMBERS...AS PROVIDED IN THE PDB....USE
00129 // GETRESIDUEINDEX() FUNCTION................
00130         BioChain getChainSegment(const long& first,const long& last, const string& atname);
00131 
00132 // These Methods are primarily given to populate the BioResidue Vector Container with 
00133 // residue information externally...
00134         void pushResidue(const BioResidue&);
00135         void pushResidue(const unsigned long& atnumber,const string& atname, const string& resname,
00136                     const long& resnum, const float& x1, const float& y1, const float& z1,
00137                       const float& oc1, const float& bf1,const string& );
00138 
00139 // Similar to the explanation in BioResidue, this removes the Hydrogen atoms from 
00140 // the data which is stored in a Vector<BioResidue> and returns it as a new BioChain.
00141         BioChain eraseHydrogen();
00142 
00143 // This Method shows all the contents of the BioChain in a PDB format. It can be 
00144 // written to a file, as was mentioned earlier. 
00145         void showChain(ostream& = cout); 
00146         friend ostream& operator<<(ostream& os, BioChain& ip);
00147 
00148 // These methods also should find great utility as they give the chance of checking whether
00149 // the proper coordinates are being taken for the analysis or not. 
00150 // The 2nd method is grepping only the required atoms from the Chain list....
00151 // For example: One wants to get only "CA" atoms from say 120-150 residue in a chain...then he 
00152 // can simple write showChainSegment(120,150,"CA"); 
00153 // ARGUMENTS ARE INDECES NOT RESIDUE NUMBERS...AS PROVIDED IN THE PDB....USE
00154 // GETRESIDUEINDEX() FUNCTION................
00155         void showChainSegment(const long& ,const long&, ostream& = cout);
00156         void showChainSegment(const long& , const long& , const string& atname,ostream& = cout);
00157 
00158 // This method, as the name suggests gives out the centroid of a coordinates stored in
00159 // the chain. This returns the centroid as a BioAtom.....again...........
00160 // This would have large application in refinement protocols, finding the least squares 
00161 // plane, line and similar things..........
00162         BioPoint getCentroid();
00163         BioPoint getCentroid(const string& );
00164 
00165         vector<float> getNormalizedBfactors();
00166         void setNormalizedBfactors();
00167         float getAverageBfactor();
00168         
00169 
00170         float getDistance( const int& , const string& ,const float& ,const float& , const float&);
00171         float getDistance( const int& , const int& , const float& , const float& , const float& );
00172 
00173         float getDistance( const int& , const string& , const BioAtom&);
00174         float getDistance( const int& , const int& , const BioAtom& );
00175 
00176         void showSphereAnAtom(const unsigned int& , const string&, ostream& = cout, const float& = 2.3, const float& = 3.2);
00177         void showSphereAnAtom(const unsigned int& , const unsigned int& , ostream& = cout, const float& = 2.3, const float& = 3.2);
00178 
00179 };
00180 
00181 #endif
00182 
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines