src/common/prospect_variables.cc

00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <ctype.h>
00006 #include <assert.h>
00007 
00008 
00009 #include <libxml/tree.h>
00010 #include <libxml/parser.h>
00011 #include <libxml/xpath.h>
00012 #include <libxml/xpathInternals.h>
00013 
00014 #include "open_prospect.h"
00015 #include "sm_matrix.h"
00016 
00017 
00018 
00019 void get_cores(TemplateStruct *template_data);
00020 void get_interactions(TemplateStruct *template_data);
00021 
00022 
00023 //---template data
00024 
00025 
00026 
00027 xmlNodePtr get_first_child(xmlNodePtr parent, char *str) {
00028   xmlNodePtr curnode;
00029   
00030   curnode = parent->children;
00031   while (curnode != NULL) {
00032     if (!strcmp((char *)curnode->name, str)) {
00033       return curnode;
00034     }
00035     curnode = curnode->next;
00036   }
00037   return NULL;
00038 }
00039 
00040 
00041 char *get_first_child_content( xmlNodePtr parent, char *str) {
00042         xmlNodePtr node = get_first_child(parent, str);
00043         if ( node == NULL )
00044                 return NULL;
00045         return (char *)xmlNodeGetContent( node );
00046 }
00047 
00048 
00049 
00050 TemplateStruct::TemplateStruct() : TargetStruct() {
00051         this->len = 0;
00052         this->cb_dist = NULL;
00053         this->template_name = NULL;
00054         this->residue = NULL;
00055         this->cores = NULL;
00056         this->core_count = 0;
00057         this->t_residue_info = NULL;
00058         this->twobody_interactions = NULL;
00059 }
00060 
00061 
00062 TemplateStruct::~TemplateStruct() {
00063         FreeMem();
00064 }
00065 
00066 
00067 void TemplateStruct::SetLen(int new_len) {
00068         if ( new_len != len ) {
00069                 TargetStruct::SetLen( new_len );        
00070                 t_residue_info = new TemplateResidue[ new_len ];        
00071         }
00072 }
00073 
00074 
00075 void TemplateStruct::FreeMem(void) {
00076         TargetStruct::FreeMem();
00077         if (this->template_name != NULL)
00078                 free(this->template_name);
00079         if (this->cb_dist) {
00080                 free(this->cb_dist[0]);
00081                 free(this->cb_dist);
00082         }
00083         this->len = 0;
00084         this->cb_dist = NULL;
00085         this->template_name = NULL;
00086         this->residue = NULL;
00087 }
00088 
00089 void TemplateStruct::CopyScoreTable( pValType **scores ) {
00090         for (int i = 0; i < len; i++ ) {
00091                 int rs = AA1toAANum( residue[i].RS );
00092                 for ( int j = 0; j < 20; j++ ) {
00093                         t_residue_info[i].Score[j] = scores[ rs ][ j ];
00094                 }               
00095         }       
00096 }
00097 
00098 void TemplateStruct::CopyScoreTable( pValType scores[21][21] ) {
00099         for (int i = 0; i < len; i++ ) {
00100                 int rs = AA1toAANum( residue[i].RS );
00101                 for ( int j = 0; j < 20; j++ ) {
00102                         t_residue_info[i].Score[j] = scores[ rs ][ j ];
00103                 }               
00104         }       
00105 }
00106 
00107 
00108 
00109 int TemplateStruct::SetSequence(char *seq) {
00110         TargetStruct::SetSequence( seq );
00111         t_residue_info = (TemplateResidue *)malloc(sizeof(TemplateResidue) * len );
00112 }
00113 
00114 int TemplateStruct::LoadFile( char *template_path ) {
00115   xmlDocPtr doc;
00116   xmlNodePtr root, fssp_node, profile_node, pdbcode_node, pdbchain_node, resSeq, iCode, 
00117     template_loc, template_start, template_stop;
00118   int i, j;
00119   float N[3], Ca[3], C[3], O[3], Cb[3], Cm[3];
00120   char *read_buffer, *cur_line;
00121   pValType tmp_val;
00122 
00123  
00124 //  fprintf(stderr, "Loading %s\n", template_path);
00125   doc = xmlParseFile(template_path);
00126   if (doc == NULL)
00127     return 1;
00128 
00129   root = xmlDocGetRootElement(doc);
00130   if (root == NULL)
00131     return 1;
00132   //Load in the residue information from the file
00133 
00134   
00135   this->threebody_count = 0;
00136   this->threebody_interactions = NULL;
00137   this->fourbody_count = 0;
00138   this->fourbody_interactions = NULL;
00139           
00140   
00141   fssp_node = get_first_child(root, "dsspData");
00142   read_buffer = (char *)xmlNodeGetContent(fssp_node);
00143   cur_line = read_buffer;
00144 
00145   TargetResidue *tmp_residue = new TargetResidue[5000];
00146   TemplateResidue *tmp_residue_info = new TemplateResidue[5000];
00147   ResidueAtoms *tmp_structure = new ResidueAtoms[5000];
00148   
00149   i=0;
00150   while (*cur_line != 0) {
00151     if (!strncmp(cur_line,"RES", 3)) {
00152                 /*
00153       sscanf(cur_line ,"RES %d%c%c%d%c%c%c%d%f%f%f%f%f%f%f%f%f%f%f",
00154              &tmp_residue[i].F,
00155              &dummyc,
00156              &tmp_residue[i].RS,
00157              &tmp_residue[i].NUM,
00158              &dummyc,&dummyc,
00159              &tmp_residue[i].SS,
00160              &tmp_residue[i].ACC,
00161              &tmp_residue[i].xCa,
00162              &tmp_residue[i].yCa,
00163              &tmp_residue[i].zCa,
00164              &tmp_residue[i].xCb,
00165              &tmp_residue[i].yCb,
00166              &tmp_residue[i].zCb,
00167              &tmp_residue[i].xCm,
00168              &tmp_residue[i].yCm,
00169              &tmp_residue[i].zCm,
00170              &tmp_residue[i].phi,
00171              &tmp_residue[i].psi);
00172 */
00173                 if (24 != sscanf(cur_line ,"RES %c %d %c %d %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f",
00174                            &tmp_residue[i].RS,
00175                            &tmp_residue_info[i].NUM,
00176                            &tmp_residue[i].SS,
00177                            &tmp_residue_info[i].ACC,
00178                                                  &N[0],  &N[1],  &N[2], 
00179                                                  &Ca[0],  &Ca[1],  &Ca[2], 
00180                                                  &C[0],  &C[1],  &C[2], 
00181                                                  &O[0],  &O[1],  &O[2], 
00182                                                  &Cb[0],  &Cb[1],  &Cb[2], 
00183                                                  &Cm[0],  &Cm[1],  &Cm[2],
00184                                                  &tmp_residue[i].phi,
00185                                                  &tmp_residue[i].psi) ) {
00186                         fprintf(stderr, "Template Formating error: \"%s\"", cur_line);
00187                         delete tmp_residue;
00188                         delete tmp_residue_info;
00189                         delete tmp_structure;
00190                         return 1;
00191                 } else {
00192                         for (int j = 0; j < 3; j++) {
00193                                 tmp_structure[i].atom[anN][j] = N[j];
00194                                 tmp_structure[i].atom[anCa][j] = Ca[j];
00195                                 tmp_structure[i].atom[anC][j] = C[j];
00196                                 tmp_structure[i].atom[anO][j] = O[j];
00197                                 tmp_structure[i].atom[anCb][j] = Cb[j];
00198                                 tmp_structure[i].Cm[j] = Cm[j];                         
00199                         }
00200                         tmp_structure[i].atom_name[anN] = anN;
00201                         tmp_structure[i].atom_name[anCa] = anCa;
00202                         tmp_structure[i].atom_name[anC] = anC;
00203                         tmp_structure[i].atom_name[anO] = anO;
00204                         tmp_structure[i].atom_name[anCb] = anCb;
00205 
00206                         tmp_structure[i].bb_num[anN] = anN;
00207                         tmp_structure[i].bb_num[anCa] = anCa;
00208                         tmp_structure[i].bb_num[anC] = anC;
00209                         tmp_structure[i].bb_num[anO] = anO;
00210                         tmp_structure[i].bb_num[anCb] = anCb;
00211                         
00212                         //flag 1 if you want to  get the  CB from the template file
00213 #if 0
00214                         if ( tmp_residue[i].RS != 'G' ) {
00215                                 strcpy(tmp_residue[i].structure.name[anCb], "CB");
00216                                 tmp_structure[i].bb_count = 4;
00217                                 tmp_structure[i].atom_count = 5;
00218                         } else {
00219                                 tmp_structur[i].bb_count = 4;
00220                                 tmp_structure[i].atom_count = 4;
00221                         }
00222 #else 
00223                         tmp_structure[i].atom_count = 4;
00224 #endif
00225                         tmp_structure[i].chain = ' ';
00226                         tmp_structure[i].res_mod = ' ';
00227                         
00228                 //      printf("read: %c %d %c %f %f %f\n", tmp_residue[i].RS, tmp_residue[i].NUM, tmp_residue[i].SS, 
00229                 //                 tmp_residue[i].xCa, tmp_residue[i].yCa, tmp_residue[i].zCa);
00230                 }
00231                 tmp_residue[i].single_del = 0.0;
00232                 tmp_residue[i].single_ins = 0.0;
00233                 
00234                 switch (tmp_residue[i].SS) {
00235                         case 'H': tmp_residue[i].SS='H'; break;
00236                         case 'G': tmp_residue[i].SS='H'; break;
00237                         case 'I': tmp_residue[i].SS='H'; break;
00238                         case 'E': tmp_residue[i].SS='E'; break;
00239                         case 'B': tmp_residue[i].SS='E'; break;
00240                         default : tmp_residue[i].SS='C'; break;
00241                 }
00242                 i++;
00243     }
00244           //advance pointer to next line or end of file
00245     while (cur_line[0] != 0 && cur_line[0] != '\n') {
00246       cur_line++;
00247     }
00248     if (cur_line[0] == '\n')
00249       cur_line++;
00250   }
00251   this->len = i;
00252   free(read_buffer);
00253   
00254   if (this->len == 0) {
00255           delete tmp_residue;
00256           delete tmp_residue_info;
00257           delete tmp_structure;
00258           xmlFreeDoc(doc); 
00259           return 1;
00260   }
00261 
00262 #ifdef ALTIVEC
00263   this->residue=(TemplateResidue *)valloc(this->len * sizeof(TemplateResidue));  
00264   assert(this->residue);
00265 #else
00266   this->residue =(TargetResidue *)malloc(this->len * sizeof(TargetResidue));
00267 #endif  
00268   this->t_residue_info = new TemplateResidue[ this->len ];
00269   this->structure = new ResidueAtoms[ this->len ];
00270   
00271   for (i = 0; i < this->len; i++) {
00272           this->residue[i] = tmp_residue[i]; 
00273           this->t_residue_info[i] = tmp_residue_info[i];
00274           this->structure[i] = tmp_structure[i];
00275   }
00276   delete tmp_residue;
00277   delete tmp_residue_info;
00278   delete tmp_structure;
00279   
00280   profile_node = get_first_child(root, "frequency_matrix");
00281   read_buffer = (char *)xmlNodeGetContent(profile_node);
00282   cur_line = read_buffer;
00283   while (cur_line[0] == '\n') {
00284       cur_line++;
00285   }
00286 
00287   for (i=0; i < this->len; i++) {
00288     sscanf(cur_line, "%f%f%f%f%f%f%f%f%f%f%f%f%f%f%f%f%f%f%f%f",
00289            &this->residue[i].FREQ[0],
00290            &this->residue[i].FREQ[1],
00291            &this->residue[i].FREQ[2],
00292            &this->residue[i].FREQ[3],
00293            &this->residue[i].FREQ[4],
00294            &this->residue[i].FREQ[5],
00295            &this->residue[i].FREQ[6],
00296            &this->residue[i].FREQ[7],
00297            &this->residue[i].FREQ[8],
00298            &this->residue[i].FREQ[9],
00299            &this->residue[i].FREQ[10],
00300            &this->residue[i].FREQ[11],
00301            &this->residue[i].FREQ[12],
00302            &this->residue[i].FREQ[13],
00303            &this->residue[i].FREQ[14],
00304            &this->residue[i].FREQ[15],
00305            &this->residue[i].FREQ[16],
00306            &this->residue[i].FREQ[17],
00307            &this->residue[i].FREQ[18],
00308            &this->residue[i].FREQ[19]);
00309           this->residue[i].FREQ[20] = 0;
00310     while (cur_line[0] != '\n') {
00311       cur_line++;
00312     }
00313     if (cur_line[0] == '\n')
00314       cur_line++;
00315     //if we hit the end of the profile info early
00316     if (cur_line[0] == 0 && i < this->len-1) 
00317       return 1;
00318   }
00319   free(read_buffer);
00320 
00321 
00322   //do the log score read in 
00323   
00324   
00325   profile_node = get_first_child(root, "score_matrix");
00326   read_buffer = (char *)xmlNodeGetContent(profile_node);
00327   cur_line = read_buffer;
00328   while (cur_line[0] == '\n') {
00329       cur_line++;
00330   }
00331   
00332   for (i=0; i < this->len; i++) {
00333           long score[20];
00334           sscanf(cur_line, "%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld",
00335                          &score[0],
00336                          &score[1],
00337                          &score[2],
00338                          &score[3],
00339                          &score[4],
00340                          &score[5],
00341                          &score[6],
00342                          &score[7],
00343                          &score[8],
00344                          &score[9],
00345                          &score[10],
00346                          &score[11],
00347                          &score[12],
00348                          &score[13],
00349                          &score[14],
00350                          &score[15],
00351                          &score[16],
00352                          &score[17],
00353                          &score[18],
00354                          &score[19]);
00355           
00356           for (int j = 0; j < 20; j++) 
00357                   this->t_residue_info[i].Score[j] = score[j];
00358           this->t_residue_info[i].Score[20] = 0;
00359           while (cur_line[0] != '\n') {
00360                   cur_line++;
00361           }
00362           if (cur_line[0] == '\n')
00363                   cur_line++;
00364           //if we hit the end of the profile info early
00365           if (cur_line[0] == 0 && i < this->len-1) 
00366                   return 1;
00367   }
00368   free(read_buffer);
00369   
00370   
00371   
00372   char *single_del_text = get_first_child_content(root, "singledel");
00373   if (single_del_text) {
00374           ReadSingleDelStr( single_del_text );
00375           free(single_del_text);
00376   }
00377   char *single_ins_text = get_first_child_content(root, "singleins");
00378   if (single_ins_text) {
00379           ReadSingleInsStr( single_ins_text );
00380           free(single_ins_text);
00381   }
00382   
00383   
00384 //-------------------------------------------------------
00385 /*
00386   xmlNodePtr core_node;
00387   read_buffer = (char *)xmlNodeGetContent(core_node);
00388   cur_line = read_buffer;
00389   sscanf(cur_line, "%s%c%c%d",dummy,&dummyc,&dummyc,&this->core_count);
00390   printf("%d\n",this->core_count);
00391   free(read_buffer);
00392 */
00393 //-------------------------------------------------------
00394   
00395   //get PDB code
00396   pdbcode_node = get_first_child(root, "pdbcode");
00397   read_buffer = (char *)xmlNodeGetContent(pdbcode_node);
00398   for (i = 0; i < 4; i++) {
00399     this->pdbcode[i] = read_buffer[i];
00400   }
00401   this->pdbcode[4] = 0;
00402   free(read_buffer);
00403 
00404   //pdb chain
00405   pdbchain_node = get_first_child(root, "pdbchain");
00406   read_buffer = (char *)xmlNodeGetContent(pdbchain_node);
00407   this->pdbchain = read_buffer[0];
00408   free(read_buffer);
00409 
00410   //template name
00411   this->template_name = (char *)xmlGetProp(root, (xmlChar *)"name");
00412 
00413   //template_loc
00414   template_loc = get_first_child(root, "template_loc");
00415   template_start = get_first_child( template_loc, "start");
00416   template_stop = get_first_child( template_loc, "stop");
00417 
00418   resSeq = get_first_child(template_start, "resSeq");
00419   iCode = get_first_child(template_start, "iCode");
00420   read_buffer = (char *)xmlNodeGetContent(resSeq);
00421   this->start_res = atoi( read_buffer );
00422   free(read_buffer);
00423   read_buffer = (char *)xmlNodeGetContent(iCode);
00424   if (read_buffer[0] == 0)
00425     read_buffer[0] = ' ';
00426   this->start_icode = read_buffer[0];
00427   free(read_buffer);
00428 
00429   resSeq = get_first_child(template_stop, "resSeq");
00430   iCode = get_first_child(template_stop, "iCode");
00431   read_buffer = (char *)xmlNodeGetContent(resSeq);
00432   this->stop_res = atoi( read_buffer );
00433   free(read_buffer);
00434   read_buffer = (char *)xmlNodeGetContent(iCode);
00435   if (read_buffer[0] == 0)
00436     read_buffer[0] = ' ';
00437   this->stop_icode = read_buffer[0];
00438   free(read_buffer);
00439 
00440 
00441   xmlFreeDoc(doc); 
00442 
00443   //  Determine template cores
00444   this->cores=(TemplateCoreDefinition *)malloc(1000*sizeof(TemplateCoreDefinition));
00445   get_interactions(this);
00446   get_cores(this);
00447 
00448 
00450   //Calculate CB distances between all residues
00452 
00453   this->cb_dist = (pValType **)malloc(sizeof(pValType *) * this->len);
00454   this->cb_dist[0] = (pValType *)malloc(sizeof(pValType) * this->len * this->len);
00455   for (i = 0; i < this->len; i++) {
00456     this->cb_dist[i] = this->cb_dist[0] + i * this->len;
00457   }
00458 
00459 
00460   for (i = 0; i < this->len; i++) {
00461           int i_cb = this->structure[i].bb_num[ anCb ];
00462           for (j = i; j < this->len; j++) {
00463                   int j_cb = this->structure[j].bb_num[ anCb ];
00464                   if ( i_cb != -1 && j_cb != -1 ) {
00465                           tmp_val = sqrt( pow(this->structure[i].atom[ i_cb ][0] - 
00466                                                                   this->structure[j].atom[ j_cb ][0], 2) +
00467                                                           pow(this->structure[i].atom[ i_cb ][1] - 
00468                                                                   this->structure[j].atom[ j_cb ][1], 2) +
00469                                                           pow(this->structure[i].atom[ i_cb ][2] - 
00470                                                                   this->structure[j].atom[ j_cb ][2], 2) );      
00471                   } else {
00472                           tmp_val = 10000;
00473                   }
00474                   this->cb_dist[i][j] = tmp_val;
00475                   this->cb_dist[j][i] = tmp_val;
00476           }
00477   }
00478 
00479   return 0;
00480 }
00481 
00482 
00483 
00484 
00485 //--------------------------------------------------------------------
00486 void get_interactions(TemplateStruct *template_data) {
00487         int i,j,cnt;
00488         float x,y,z,distance;
00489         cnt = 0;
00490         
00491         for (i=0;i<template_data->len;i++) {
00492                 for (j=i+MinDistTwoBody;j<template_data->len;j++) {                     
00493                         x = template_data->structure[i].atom[anCb][0] - template_data->structure[j].atom[anCb][0];
00494                         y = template_data->structure[i].atom[anCb][1] - template_data->structure[j].atom[anCb][1];
00495                         z = template_data->structure[i].atom[anCb][2] - template_data->structure[j].atom[anCb][2];
00496                         
00497                         distance = x*x + y*y + z*z;
00498                         
00499                         if (distance <= PairDistCutoff) {
00500                                 cnt += 1;
00501                         }
00502                 }
00503         }
00504         
00505         template_data->twobody_count = cnt;     
00506         template_data->twobody_interactions = (InteractionElement *)malloc(template_data->twobody_count*sizeof(InteractionElement));    
00507         cnt = 0;
00508         
00509         for (i=0;i<template_data->len;i++) {
00510                 for (j=i+MinDistTwoBody;j<template_data->len;j++) {
00511                         
00512                         x = template_data->structure[i].atom[anCb][0] - template_data->structure[j].atom[anCb][0];
00513                         y = template_data->structure[i].atom[anCb][1] - template_data->structure[j].atom[anCb][1];
00514                         z = template_data->structure[i].atom[anCb][2] - template_data->structure[j].atom[anCb][2];                      
00515                         distance = x*x + y*y + z*z;                     
00516                         if (distance <= PairDistCutoff) {
00517                                 template_data->twobody_interactions[cnt].member_count = 2;
00518                                 template_data->twobody_interactions[cnt].residue_number[0] = i;
00519                                 template_data->twobody_interactions[cnt].residue_number[1] = j;
00520                                 cnt++;
00521                         }
00522                 }
00523         }
00524 }
00525 
00526 //--------------------------------------------------------------------
00527 
00528 //my $MinHelixLen = 5;   # old 4
00529 //my $MinSheetLen = 2;   # old 3
00530 
00531 #define cMinHelix  5
00532 #define cMinStrand  3
00533 
00534 //#define DEBUG 1
00535 
00536 void get_cores(TemplateStruct *template_data) {
00537   char last_state='C';
00538   int cur_core=0;
00539 
00540   //first identify all possible cores
00541   for (int i=0; i < template_data->len; i++) {
00542     if (last_state != 'C' && last_state != template_data->residue[i].SS) {
00543       if ( cur_core > 0)
00544               template_data->cores[cur_core-1].len = i - template_data->cores[cur_core-1].head;
00545     }
00546     if ( (template_data->residue[i].SS == 'H' || template_data->residue[i].SS == 'E') && 
00547         last_state != template_data->residue[i].SS) {
00548       template_data->cores[cur_core].head=i;
00549       cur_core++;
00550     }
00551     last_state = template_data->residue[i].SS;
00552   }
00553   if (last_state != 'C') {
00554           template_data->cores[cur_core-1].len = template_data->len - template_data->cores[cur_core-1].head;
00555   }
00556   template_data->core_count=cur_core;
00557 
00558   
00559    
00560   //remove cores that are too short
00561   for (int i = 0; i < template_data->core_count; i++) {
00562 #ifdef DEBUG
00563           fprintf(stderr, "Core %d (%c): %d\n", i, template_data->residue[ template_data->cores[i].head ].SS,  template_data->cores[i].len );
00564 #endif
00565           if ( template_data->residue[ template_data->cores[i].head ].SS == 'H' && template_data->cores[i].len < cMinHelix ) {
00566                   for (int j = i; j < template_data->core_count-1; j++) {
00567                           template_data->cores[j] = template_data->cores[j+1];
00568                   }
00569                   template_data->core_count--;
00570                   i--;
00571           } else if ( template_data->residue[ template_data->cores[i].head ].SS == 'E' && template_data->cores[i].len < cMinStrand ) {
00572                   for (int j = i; j < template_data->core_count-1; j++) {
00573                           template_data->cores[j] = template_data->cores[j+1];
00574                   }
00575                   template_data->core_count--;
00576                   i--;
00577           }
00578   }
00579   
00580 #if 1
00581 
00582   //remove cores with not enough contacts  
00583   int core_residue_count = 0, core_interaction_count = 0;
00584   for (int i = 0; i < template_data->core_count; i++) {
00585           core_residue_count += template_data->cores[i].len;
00586   }
00587   for (int i = 0; i < template_data->core_count; i++) {
00588           for (int j = 0; j < template_data->core_count; j++) {
00589                   if ( i != j) {
00590                           for (int k= 0; k < template_data->twobody_count; k++) {
00591                                   if ( template_data->twobody_interactions[k].residue_number[0] >= template_data->cores[i].head &&
00592                                            template_data->twobody_interactions[k].residue_number[0] <  template_data->cores[i].head + template_data->cores[i].len &&
00593                                            template_data->twobody_interactions[k].residue_number[1] >= template_data->cores[j].head &&
00594                                            template_data->twobody_interactions[k].residue_number[1] <  template_data->cores[j].head + template_data->cores[i].len ) {
00595                                           core_interaction_count++;                                       
00596                                   }
00597                           }
00598                   }
00599           }       
00600   }  
00601   float core_interaction_ratio = (float)core_interaction_count / (float) core_residue_count;  
00602 #ifdef DEBUG 
00603         fprintf(stderr, "protein core interaction ratio: %f\n", core_interaction_ratio);
00604 #endif
00605 
00606   for (int i = 0; i < template_data->core_count; i++) {
00607           long cur_interaction_count = 0;
00608 #ifdef DEBUG
00609         fprintf(stderr, "core %d\n", i);
00610 #endif
00611           for (int j = 0; j < template_data->core_count; j++) {
00612                   if ( i != j) {
00613                           for (int k= 0; k < template_data->twobody_count; k++) {
00614                                   if (( template_data->twobody_interactions[k].residue_number[0] >= template_data->cores[i].head &&
00615                                            template_data->twobody_interactions[k].residue_number[0] <  template_data->cores[i].head + template_data->cores[i].len &&
00616                                            template_data->twobody_interactions[k].residue_number[1] >= template_data->cores[j].head &&
00617                                            template_data->twobody_interactions[k].residue_number[1] <  template_data->cores[j].head + template_data->cores[i].len) ||
00618                                         ( template_data->twobody_interactions[k].residue_number[1] >= template_data->cores[i].head &&
00619                                            template_data->twobody_interactions[k].residue_number[1] <  template_data->cores[i].head + template_data->cores[i].len &&
00620                                            template_data->twobody_interactions[k].residue_number[0] >= template_data->cores[j].head &&
00621                                            template_data->twobody_interactions[k].residue_number[0] <  template_data->cores[j].head + template_data->cores[i].len)
00622 
00623  ) {
00624                                           cur_interaction_count++;      
00625 #ifdef DEBUG
00626                                           fprintf(stderr, "\t%d -- %d\n", template_data->twobody_interactions[k].residue_number[0],
00627                                                                 template_data->twobody_interactions[k].residue_number[1] );
00628 #endif                            
00629                                   }
00630                           }
00631                   }
00632           }       
00633 #ifdef DEBUG
00634         fprintf(stderr, "\tinteraction ratio %f\n", ( (float) cur_interaction_count / (float) template_data->cores[i].len ) );
00635 #endif
00636           if ( 0.3 * core_interaction_ratio > ( (float) cur_interaction_count / (float) template_data->cores[i].len ) ) {
00637                   //removing the core
00638                   for (int j = i; j < template_data->core_count-1; j++) {
00639                           template_data->cores[j] = template_data->cores[j+1];
00640                   }
00641                   template_data->core_count--;          
00642                   i--;
00643           }       
00644   }
00645   
00646 
00647   //remove terminal residues that don't have enough contacts  
00648   for (int i = 0; i < template_data->core_count; i++) {
00649           long cur_interaction_count = 0;
00650           long residue_interaction_count[ template_data->cores[i].len ];
00651           for (int j = 0; j < template_data->cores[i].len; j++) {
00652                   residue_interaction_count[j] = 0;
00653           }
00654           for (int j = 0; j < template_data->core_count; j++) {
00655             if ( i != j) {
00656               for (int k= 0; k < template_data->twobody_count; k++) {
00657                 if ( template_data->twobody_interactions[k].residue_number[0] >= template_data->cores[i].head &&
00658                      template_data->twobody_interactions[k].residue_number[0] <  template_data->cores[i].head + template_data->cores[i].len &&
00659                      template_data->twobody_interactions[k].residue_number[1] >= template_data->cores[j].head &&
00660                      template_data->twobody_interactions[k].residue_number[1] <  template_data->cores[j].head + template_data->cores[j].len 
00661                      ) {
00662                   cur_interaction_count++;
00663                   residue_interaction_count[ template_data->twobody_interactions[k].residue_number[0] - template_data->cores[i].head ]++;                                         
00664                 } else if (template_data->twobody_interactions[k].residue_number[1] >= template_data->cores[i].head &&
00665                            template_data->twobody_interactions[k].residue_number[1] <  template_data->cores[i].head + template_data->cores[i].len &&
00666                            template_data->twobody_interactions[k].residue_number[0] >= template_data->cores[j].head &&
00667                            template_data->twobody_interactions[k].residue_number[0] <  template_data->cores[j].head + template_data->cores[j].len
00668                            ) {
00669                   cur_interaction_count++;
00670                   residue_interaction_count[ template_data->twobody_interactions[k].residue_number[1] - template_data->cores[i].head ]++;
00671                 }
00672               }
00673             }
00674           }
00675 
00676 #ifdef DEBUG
00677           
00678           for (int k = 0; k < template_data->cores[i].len; k++) {
00679             fprintf(stderr, "%d ", residue_interaction_count[k]);
00680           }
00681 #endif
00682           char removed = 0;
00683           long total_removed = 0, new_head = 0, new_len = template_data->cores[i].len;  
00684           do {
00685                   removed = 0;
00686                   if ( residue_interaction_count[ new_head ] < residue_interaction_count[ new_len-1 ] ) {
00687                           if (  total_removed + residue_interaction_count[new_head] < 0.1 * (float) cur_interaction_count ) {
00688                                   total_removed += residue_interaction_count[new_head];
00689                                   new_head++;
00690                                   removed = 1;
00691                           }
00692                   } else {
00693                           if (  total_removed + residue_interaction_count[new_len-1] < 0.1 * (float) cur_interaction_count ) {
00694                                   total_removed += residue_interaction_count[new_len-1];
00695                                   new_len--;
00696                                   removed = 1;
00697                           }
00698                   }
00699           } while (removed);    
00700 #ifdef DEBUG 
00701           fprintf(stderr, "Core %d (%c)  head %d (%d)  len: %d (%d )\n", i, template_data->residue[ template_data->cores[i].head ].SS,
00702                          template_data->cores[i].head + new_head, template_data->cores[i].head,
00703                          new_len - new_head,  template_data->cores[i].len );
00704 #endif  
00705           template_data->cores[i].head += new_head;
00706           template_data->cores[i].len = new_len - new_head;  
00707           
00708   }
00709   
00710   //again, remove cores that are too short
00711   for (int i = 0; i < template_data->core_count; i++) {
00712           if ( template_data->residue[ template_data->cores[i].head ].SS == 'H' && template_data->cores[i].len < cMinHelix-1 ) {
00713                   for (int j = i; j < template_data->core_count-1; j++) {
00714                           template_data->cores[j] = template_data->cores[j+1];
00715                   }
00716                   template_data->core_count--;
00717                   i--;
00718           } else if ( template_data->residue[ template_data->cores[i].head ].SS == 'E' && template_data->cores[i].len < cMinStrand-1 ) {
00719                   for (int j = i; j < template_data->core_count-1; j++) {
00720                           template_data->cores[j] = template_data->cores[j+1];
00721                   }
00722                   template_data->core_count--;
00723                   i--;
00724           }
00725   }
00726   
00727 #endif  
00728 
00729   
00730   
00731   
00732   //label residues by their core numbers
00733   for (int i = 0; i < template_data->len; i++) {
00734     template_data->t_residue_info[i].core_num = -1;
00735   }
00736   for (int i = 0; i < template_data->core_count; i++) {
00737           for (int j = 0; j < template_data->cores[i].len; j++) {
00738                   template_data->t_residue_info[ template_data->cores[i].head + j ].core_num = i;
00739           }
00740   }  
00741   
00742   //go through and label interactions by core #  
00743   for (int i = 0; i < template_data->core_count; i++) {
00744           for (int j = 0; j < template_data->twobody_count; j++) {
00745                   for (int k = 0; k < 2; k++) {
00746                           if ( template_data->twobody_interactions[j].residue_number[k] >= template_data->cores[i].head &&
00747                                    template_data->twobody_interactions[j].residue_number[k] < template_data->cores[i].head + template_data->cores[i].len ) {
00748                                   template_data->twobody_interactions[j].core_number[k] = i;
00749                           }
00750                   }
00751           }
00752   }
00753 }
00754 
00755 /*
00756 
00757 void ProteinLinearAtoms::extract_residueatom_array( ResidueAtoms *r_atoms, int count) { 
00758         int counter = 0;
00759         for (int i= 0; i < count; i++) {
00760                 counter += r_atoms[i].atom_count;
00761         }       
00762         atom_count = counter;
00763         atom_loc = (vec3 *)malloc(sizeof(vec3) * atom_count );
00764         residue_num = (int *)malloc(sizeof(int) * atom_count );
00765         atom_type = (int *)malloc(sizeof(int) * atom_count );
00766         temp_factor = (float *)malloc(sizeof(float) * atom_count );     
00767         counter = 0;
00768         for (int i= 0; i < count; i++) {
00769                 for (int j = 0; j < r_atoms[i].atom_count; j++) {
00770                         atom_loc[ counter ][0] = r_atoms[i].atom[j][0];
00771                         atom_loc[ counter ][1] = r_atoms[i].atom[j][1];
00772                         atom_loc[ counter ][2] = r_atoms[i].atom[j][2];
00773                         residue_num[ counter ] = i;
00774                         temp_factor[ counter ] = r_atoms[i].temp_factor[j];
00775                         counter++;
00776                 }
00777         }       
00778 }
00779 
00780 */
00781 
00782 AlignFeatures::AlignFeatures(void) {
00783         for ( int i =0; i < f_count; i++ ) {
00784                 is_set[i] = 0;
00785         }
00786 }
00787 
00788 
00789 void AlignFeatures::CalcFeatures( class TemplateStruct *template_data, 
00790                                                                   class TargetStruct *target_data,  
00791                                                                   class AlignmentStruct *align ) {      
00792         
00793         
00794         //alignment stuff
00795         feature[ fn_align ] = 0;
00796         feature[ fn_ident ] = 0;
00797         for (int i = 0; i < target_data->len; i++) {
00798                 if (align->GetTargetAlign( i )!= -1 ) {
00799                         feature[ fn_align ]++;
00800                         if ( target_data->residue[i].RS == template_data->residue[ align->GetTargetAlign( i ) ].RS ) {
00801                                 feature[fn_ident]++;
00802                         }
00803                 }
00804         }
00805         feature[ fr_align ] = feature[fn_align] / (float)target_data->len;
00806         feature[ fr_ident ] = feature[fn_ident] / (float)target_data->len;      
00807         is_set[ fr_align ] = 1;
00808         is_set[ fn_align ] = 1;
00809         is_set[ fr_ident ] = 1;
00810         is_set[ fn_ident ] = 1;
00811                 
00812         //get number of aligned residue pairs
00813         feature[ fn_align_pair ] = 0;
00814         long pair_total = 0;
00815         for (int i = 0; i < template_data->twobody_count; i++) {
00816                 if ( template_data->twobody_interactions[i].core_number[0] != -1 &&
00817                          template_data->twobody_interactions[i].core_number[1] != -1 ) {
00818                         pair_total++;
00819                         if (align->GetTemplateAlign( template_data->twobody_interactions[i].residue_number[0] ) != -1 &&
00820                                 align->GetTemplateAlign( template_data->twobody_interactions[i].residue_number[1] ) != -1 ) {
00821                                 feature[ fn_align_pair ]++;
00822                         }
00823                 }
00824         }
00825         feature[ fr_align_pair ] = feature[ fn_align_pair ] / (double) pair_total;
00826         is_set[ fr_align_pair ] = 1;
00827         is_set[ fn_align_pair ] = 1;
00828         
00829         //core alignment stuff
00830         //BUG: this assumes the whole core is aligned...
00831         feature[ fn_align_core_res ] = 0;
00832         feature[ fn_align_core ] = 0;
00833         for (int j = 0; j < template_data->core_count; j++) {
00834                 if ( align->GetTemplateAlign( template_data->cores[j].head ) != -1 ) {
00835                         feature[ fn_align_core ]++;
00836                 }               
00837         }
00838         for (int j = 0; j < template_data->core_count; j++) {
00839                 feature[ fn_align_core_res ] += template_data->cores[j].len;            
00840         }
00841         is_set[ fn_align_core_res ] = 1;
00842         is_set[ fn_align_core ] = 1;
00843         
00844         
00845         
00846         
00847         //contact stuff
00848         feature[ fn_contact ] = 0;
00849         feature[ fn_halfcontact ] = 0;
00850         int total_contact = 0;
00851         for ( int i = 0; i < template_data->len; i++ ) {
00852                 ResidueAtoms *res_1 = &template_data->structure[i];
00853                 int align_1 = align->GetTemplateAlign(i);
00854                 for ( int j = i+1; j < template_data->len; j++ ) {
00855                         ResidueAtoms *res_2 = &template_data->structure[j];
00856                         int align_2 = align->GetTemplateAlign(j);
00857                         float dist = smDistance( res_1->atom[ res_1->bb_num[ anCa ] ], 
00858                                                                          res_2->atom[ res_2->bb_num[ anCa ] ] );
00859                         if ( dist < 7.6 ) {
00860                                 total_contact++;
00861                                 if ( (align_1 == -1 && align_2 != -1) || (align_1 != -1 && align_2 == -1) ) {
00862                                         feature[ fn_halfcontact ]++;
00863                                 }
00864                                 if ( (align_1 != -1 && align_2 != -1)  ) {
00865                                         feature[ fn_contact ]++;
00866                                 }                                               
00867                         }
00868                 }
00869         }       
00870         feature[ fr_contact ] = feature[ fn_contact ] / (float) total_contact;
00871         feature[ fr_halfcontact ] = feature[ fn_halfcontact ] / (float) total_contact;
00872         is_set[ fr_contact ] = 1;
00873         is_set[ fn_contact ] = 1;
00874         is_set[ fr_halfcontact ] = 1;
00875         is_set[ fn_halfcontact ] = 1;   
00876 }
00877 
00878 

Generated on Wed Apr 11 16:50:50 2007 for open_prospect by  doxygen 1.4.6