00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <ctype.h>
00006 #include <assert.h>
00007
00008
00009 #include <libxml/tree.h>
00010 #include <libxml/parser.h>
00011 #include <libxml/xpath.h>
00012 #include <libxml/xpathInternals.h>
00013
00014 #include "open_prospect.h"
00015 #include "sm_matrix.h"
00016
00017
00018
00019 void get_cores(TemplateStruct *template_data);
00020 void get_interactions(TemplateStruct *template_data);
00021
00022
00023
00024
00025
00026
00027 xmlNodePtr get_first_child(xmlNodePtr parent, char *str) {
00028 xmlNodePtr curnode;
00029
00030 curnode = parent->children;
00031 while (curnode != NULL) {
00032 if (!strcmp((char *)curnode->name, str)) {
00033 return curnode;
00034 }
00035 curnode = curnode->next;
00036 }
00037 return NULL;
00038 }
00039
00040
00041 char *get_first_child_content( xmlNodePtr parent, char *str) {
00042 xmlNodePtr node = get_first_child(parent, str);
00043 if ( node == NULL )
00044 return NULL;
00045 return (char *)xmlNodeGetContent( node );
00046 }
00047
00048
00049
00050 TemplateStruct::TemplateStruct() : TargetStruct() {
00051 this->len = 0;
00052 this->cb_dist = NULL;
00053 this->template_name = NULL;
00054 this->residue = NULL;
00055 this->cores = NULL;
00056 this->core_count = 0;
00057 this->t_residue_info = NULL;
00058 this->twobody_interactions = NULL;
00059 }
00060
00061
00062 TemplateStruct::~TemplateStruct() {
00063 FreeMem();
00064 }
00065
00066
00067 void TemplateStruct::SetLen(int new_len) {
00068 if ( new_len != len ) {
00069 TargetStruct::SetLen( new_len );
00070 t_residue_info = new TemplateResidue[ new_len ];
00071 }
00072 }
00073
00074
00075 void TemplateStruct::FreeMem(void) {
00076 TargetStruct::FreeMem();
00077 if (this->template_name != NULL)
00078 free(this->template_name);
00079 if (this->cb_dist) {
00080 free(this->cb_dist[0]);
00081 free(this->cb_dist);
00082 }
00083 this->len = 0;
00084 this->cb_dist = NULL;
00085 this->template_name = NULL;
00086 this->residue = NULL;
00087 }
00088
00089 void TemplateStruct::CopyScoreTable( pValType **scores ) {
00090 for (int i = 0; i < len; i++ ) {
00091 int rs = AA1toAANum( residue[i].RS );
00092 for ( int j = 0; j < 20; j++ ) {
00093 t_residue_info[i].Score[j] = scores[ rs ][ j ];
00094 }
00095 }
00096 }
00097
00098 void TemplateStruct::CopyScoreTable( pValType scores[21][21] ) {
00099 for (int i = 0; i < len; i++ ) {
00100 int rs = AA1toAANum( residue[i].RS );
00101 for ( int j = 0; j < 20; j++ ) {
00102 t_residue_info[i].Score[j] = scores[ rs ][ j ];
00103 }
00104 }
00105 }
00106
00107
00108
00109 int TemplateStruct::SetSequence(char *seq) {
00110 TargetStruct::SetSequence( seq );
00111 t_residue_info = (TemplateResidue *)malloc(sizeof(TemplateResidue) * len );
00112 }
00113
00114 int TemplateStruct::LoadFile( char *template_path ) {
00115 xmlDocPtr doc;
00116 xmlNodePtr root, fssp_node, profile_node, pdbcode_node, pdbchain_node, resSeq, iCode,
00117 template_loc, template_start, template_stop;
00118 int i, j;
00119 float N[3], Ca[3], C[3], O[3], Cb[3], Cm[3];
00120 char *read_buffer, *cur_line;
00121 pValType tmp_val;
00122
00123
00124
00125 doc = xmlParseFile(template_path);
00126 if (doc == NULL)
00127 return 1;
00128
00129 root = xmlDocGetRootElement(doc);
00130 if (root == NULL)
00131 return 1;
00132
00133
00134
00135 this->threebody_count = 0;
00136 this->threebody_interactions = NULL;
00137 this->fourbody_count = 0;
00138 this->fourbody_interactions = NULL;
00139
00140
00141 fssp_node = get_first_child(root, "dsspData");
00142 read_buffer = (char *)xmlNodeGetContent(fssp_node);
00143 cur_line = read_buffer;
00144
00145 TargetResidue *tmp_residue = new TargetResidue[5000];
00146 TemplateResidue *tmp_residue_info = new TemplateResidue[5000];
00147 ResidueAtoms *tmp_structure = new ResidueAtoms[5000];
00148
00149 i=0;
00150 while (*cur_line != 0) {
00151 if (!strncmp(cur_line,"RES", 3)) {
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173 if (24 != sscanf(cur_line ,"RES %c %d %c %d %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f",
00174 &tmp_residue[i].RS,
00175 &tmp_residue_info[i].NUM,
00176 &tmp_residue[i].SS,
00177 &tmp_residue_info[i].ACC,
00178 &N[0], &N[1], &N[2],
00179 &Ca[0], &Ca[1], &Ca[2],
00180 &C[0], &C[1], &C[2],
00181 &O[0], &O[1], &O[2],
00182 &Cb[0], &Cb[1], &Cb[2],
00183 &Cm[0], &Cm[1], &Cm[2],
00184 &tmp_residue[i].phi,
00185 &tmp_residue[i].psi) ) {
00186 fprintf(stderr, "Template Formating error: \"%s\"", cur_line);
00187 delete tmp_residue;
00188 delete tmp_residue_info;
00189 delete tmp_structure;
00190 return 1;
00191 } else {
00192 for (int j = 0; j < 3; j++) {
00193 tmp_structure[i].atom[anN][j] = N[j];
00194 tmp_structure[i].atom[anCa][j] = Ca[j];
00195 tmp_structure[i].atom[anC][j] = C[j];
00196 tmp_structure[i].atom[anO][j] = O[j];
00197 tmp_structure[i].atom[anCb][j] = Cb[j];
00198 tmp_structure[i].Cm[j] = Cm[j];
00199 }
00200 tmp_structure[i].atom_name[anN] = anN;
00201 tmp_structure[i].atom_name[anCa] = anCa;
00202 tmp_structure[i].atom_name[anC] = anC;
00203 tmp_structure[i].atom_name[anO] = anO;
00204 tmp_structure[i].atom_name[anCb] = anCb;
00205
00206 tmp_structure[i].bb_num[anN] = anN;
00207 tmp_structure[i].bb_num[anCa] = anCa;
00208 tmp_structure[i].bb_num[anC] = anC;
00209 tmp_structure[i].bb_num[anO] = anO;
00210 tmp_structure[i].bb_num[anCb] = anCb;
00211
00212
00213 #if 0
00214 if ( tmp_residue[i].RS != 'G' ) {
00215 strcpy(tmp_residue[i].structure.name[anCb], "CB");
00216 tmp_structure[i].bb_count = 4;
00217 tmp_structure[i].atom_count = 5;
00218 } else {
00219 tmp_structur[i].bb_count = 4;
00220 tmp_structure[i].atom_count = 4;
00221 }
00222 #else
00223 tmp_structure[i].atom_count = 4;
00224 #endif
00225 tmp_structure[i].chain = ' ';
00226 tmp_structure[i].res_mod = ' ';
00227
00228
00229
00230 }
00231 tmp_residue[i].single_del = 0.0;
00232 tmp_residue[i].single_ins = 0.0;
00233
00234 switch (tmp_residue[i].SS) {
00235 case 'H': tmp_residue[i].SS='H'; break;
00236 case 'G': tmp_residue[i].SS='H'; break;
00237 case 'I': tmp_residue[i].SS='H'; break;
00238 case 'E': tmp_residue[i].SS='E'; break;
00239 case 'B': tmp_residue[i].SS='E'; break;
00240 default : tmp_residue[i].SS='C'; break;
00241 }
00242 i++;
00243 }
00244
00245 while (cur_line[0] != 0 && cur_line[0] != '\n') {
00246 cur_line++;
00247 }
00248 if (cur_line[0] == '\n')
00249 cur_line++;
00250 }
00251 this->len = i;
00252 free(read_buffer);
00253
00254 if (this->len == 0) {
00255 delete tmp_residue;
00256 delete tmp_residue_info;
00257 delete tmp_structure;
00258 xmlFreeDoc(doc);
00259 return 1;
00260 }
00261
00262 #ifdef ALTIVEC
00263 this->residue=(TemplateResidue *)valloc(this->len * sizeof(TemplateResidue));
00264 assert(this->residue);
00265 #else
00266 this->residue =(TargetResidue *)malloc(this->len * sizeof(TargetResidue));
00267 #endif
00268 this->t_residue_info = new TemplateResidue[ this->len ];
00269 this->structure = new ResidueAtoms[ this->len ];
00270
00271 for (i = 0; i < this->len; i++) {
00272 this->residue[i] = tmp_residue[i];
00273 this->t_residue_info[i] = tmp_residue_info[i];
00274 this->structure[i] = tmp_structure[i];
00275 }
00276 delete tmp_residue;
00277 delete tmp_residue_info;
00278 delete tmp_structure;
00279
00280 profile_node = get_first_child(root, "frequency_matrix");
00281 read_buffer = (char *)xmlNodeGetContent(profile_node);
00282 cur_line = read_buffer;
00283 while (cur_line[0] == '\n') {
00284 cur_line++;
00285 }
00286
00287 for (i=0; i < this->len; i++) {
00288 sscanf(cur_line, "%f%f%f%f%f%f%f%f%f%f%f%f%f%f%f%f%f%f%f%f",
00289 &this->residue[i].FREQ[0],
00290 &this->residue[i].FREQ[1],
00291 &this->residue[i].FREQ[2],
00292 &this->residue[i].FREQ[3],
00293 &this->residue[i].FREQ[4],
00294 &this->residue[i].FREQ[5],
00295 &this->residue[i].FREQ[6],
00296 &this->residue[i].FREQ[7],
00297 &this->residue[i].FREQ[8],
00298 &this->residue[i].FREQ[9],
00299 &this->residue[i].FREQ[10],
00300 &this->residue[i].FREQ[11],
00301 &this->residue[i].FREQ[12],
00302 &this->residue[i].FREQ[13],
00303 &this->residue[i].FREQ[14],
00304 &this->residue[i].FREQ[15],
00305 &this->residue[i].FREQ[16],
00306 &this->residue[i].FREQ[17],
00307 &this->residue[i].FREQ[18],
00308 &this->residue[i].FREQ[19]);
00309 this->residue[i].FREQ[20] = 0;
00310 while (cur_line[0] != '\n') {
00311 cur_line++;
00312 }
00313 if (cur_line[0] == '\n')
00314 cur_line++;
00315
00316 if (cur_line[0] == 0 && i < this->len-1)
00317 return 1;
00318 }
00319 free(read_buffer);
00320
00321
00322
00323
00324
00325 profile_node = get_first_child(root, "score_matrix");
00326 read_buffer = (char *)xmlNodeGetContent(profile_node);
00327 cur_line = read_buffer;
00328 while (cur_line[0] == '\n') {
00329 cur_line++;
00330 }
00331
00332 for (i=0; i < this->len; i++) {
00333 long score[20];
00334 sscanf(cur_line, "%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld%ld",
00335 &score[0],
00336 &score[1],
00337 &score[2],
00338 &score[3],
00339 &score[4],
00340 &score[5],
00341 &score[6],
00342 &score[7],
00343 &score[8],
00344 &score[9],
00345 &score[10],
00346 &score[11],
00347 &score[12],
00348 &score[13],
00349 &score[14],
00350 &score[15],
00351 &score[16],
00352 &score[17],
00353 &score[18],
00354 &score[19]);
00355
00356 for (int j = 0; j < 20; j++)
00357 this->t_residue_info[i].Score[j] = score[j];
00358 this->t_residue_info[i].Score[20] = 0;
00359 while (cur_line[0] != '\n') {
00360 cur_line++;
00361 }
00362 if (cur_line[0] == '\n')
00363 cur_line++;
00364
00365 if (cur_line[0] == 0 && i < this->len-1)
00366 return 1;
00367 }
00368 free(read_buffer);
00369
00370
00371
00372 char *single_del_text = get_first_child_content(root, "singledel");
00373 if (single_del_text) {
00374 ReadSingleDelStr( single_del_text );
00375 free(single_del_text);
00376 }
00377 char *single_ins_text = get_first_child_content(root, "singleins");
00378 if (single_ins_text) {
00379 ReadSingleInsStr( single_ins_text );
00380 free(single_ins_text);
00381 }
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396 pdbcode_node = get_first_child(root, "pdbcode");
00397 read_buffer = (char *)xmlNodeGetContent(pdbcode_node);
00398 for (i = 0; i < 4; i++) {
00399 this->pdbcode[i] = read_buffer[i];
00400 }
00401 this->pdbcode[4] = 0;
00402 free(read_buffer);
00403
00404
00405 pdbchain_node = get_first_child(root, "pdbchain");
00406 read_buffer = (char *)xmlNodeGetContent(pdbchain_node);
00407 this->pdbchain = read_buffer[0];
00408 free(read_buffer);
00409
00410
00411 this->template_name = (char *)xmlGetProp(root, (xmlChar *)"name");
00412
00413
00414 template_loc = get_first_child(root, "template_loc");
00415 template_start = get_first_child( template_loc, "start");
00416 template_stop = get_first_child( template_loc, "stop");
00417
00418 resSeq = get_first_child(template_start, "resSeq");
00419 iCode = get_first_child(template_start, "iCode");
00420 read_buffer = (char *)xmlNodeGetContent(resSeq);
00421 this->start_res = atoi( read_buffer );
00422 free(read_buffer);
00423 read_buffer = (char *)xmlNodeGetContent(iCode);
00424 if (read_buffer[0] == 0)
00425 read_buffer[0] = ' ';
00426 this->start_icode = read_buffer[0];
00427 free(read_buffer);
00428
00429 resSeq = get_first_child(template_stop, "resSeq");
00430 iCode = get_first_child(template_stop, "iCode");
00431 read_buffer = (char *)xmlNodeGetContent(resSeq);
00432 this->stop_res = atoi( read_buffer );
00433 free(read_buffer);
00434 read_buffer = (char *)xmlNodeGetContent(iCode);
00435 if (read_buffer[0] == 0)
00436 read_buffer[0] = ' ';
00437 this->stop_icode = read_buffer[0];
00438 free(read_buffer);
00439
00440
00441 xmlFreeDoc(doc);
00442
00443
00444 this->cores=(TemplateCoreDefinition *)malloc(1000*sizeof(TemplateCoreDefinition));
00445 get_interactions(this);
00446 get_cores(this);
00447
00448
00450
00452
00453 this->cb_dist = (pValType **)malloc(sizeof(pValType *) * this->len);
00454 this->cb_dist[0] = (pValType *)malloc(sizeof(pValType) * this->len * this->len);
00455 for (i = 0; i < this->len; i++) {
00456 this->cb_dist[i] = this->cb_dist[0] + i * this->len;
00457 }
00458
00459
00460 for (i = 0; i < this->len; i++) {
00461 int i_cb = this->structure[i].bb_num[ anCb ];
00462 for (j = i; j < this->len; j++) {
00463 int j_cb = this->structure[j].bb_num[ anCb ];
00464 if ( i_cb != -1 && j_cb != -1 ) {
00465 tmp_val = sqrt( pow(this->structure[i].atom[ i_cb ][0] -
00466 this->structure[j].atom[ j_cb ][0], 2) +
00467 pow(this->structure[i].atom[ i_cb ][1] -
00468 this->structure[j].atom[ j_cb ][1], 2) +
00469 pow(this->structure[i].atom[ i_cb ][2] -
00470 this->structure[j].atom[ j_cb ][2], 2) );
00471 } else {
00472 tmp_val = 10000;
00473 }
00474 this->cb_dist[i][j] = tmp_val;
00475 this->cb_dist[j][i] = tmp_val;
00476 }
00477 }
00478
00479 return 0;
00480 }
00481
00482
00483
00484
00485
00486 void get_interactions(TemplateStruct *template_data) {
00487 int i,j,cnt;
00488 float x,y,z,distance;
00489 cnt = 0;
00490
00491 for (i=0;i<template_data->len;i++) {
00492 for (j=i+MinDistTwoBody;j<template_data->len;j++) {
00493 x = template_data->structure[i].atom[anCb][0] - template_data->structure[j].atom[anCb][0];
00494 y = template_data->structure[i].atom[anCb][1] - template_data->structure[j].atom[anCb][1];
00495 z = template_data->structure[i].atom[anCb][2] - template_data->structure[j].atom[anCb][2];
00496
00497 distance = x*x + y*y + z*z;
00498
00499 if (distance <= PairDistCutoff) {
00500 cnt += 1;
00501 }
00502 }
00503 }
00504
00505 template_data->twobody_count = cnt;
00506 template_data->twobody_interactions = (InteractionElement *)malloc(template_data->twobody_count*sizeof(InteractionElement));
00507 cnt = 0;
00508
00509 for (i=0;i<template_data->len;i++) {
00510 for (j=i+MinDistTwoBody;j<template_data->len;j++) {
00511
00512 x = template_data->structure[i].atom[anCb][0] - template_data->structure[j].atom[anCb][0];
00513 y = template_data->structure[i].atom[anCb][1] - template_data->structure[j].atom[anCb][1];
00514 z = template_data->structure[i].atom[anCb][2] - template_data->structure[j].atom[anCb][2];
00515 distance = x*x + y*y + z*z;
00516 if (distance <= PairDistCutoff) {
00517 template_data->twobody_interactions[cnt].member_count = 2;
00518 template_data->twobody_interactions[cnt].residue_number[0] = i;
00519 template_data->twobody_interactions[cnt].residue_number[1] = j;
00520 cnt++;
00521 }
00522 }
00523 }
00524 }
00525
00526
00527
00528
00529
00530
00531 #define cMinHelix 5
00532 #define cMinStrand 3
00533
00534
00535
00536 void get_cores(TemplateStruct *template_data) {
00537 char last_state='C';
00538 int cur_core=0;
00539
00540
00541 for (int i=0; i < template_data->len; i++) {
00542 if (last_state != 'C' && last_state != template_data->residue[i].SS) {
00543 if ( cur_core > 0)
00544 template_data->cores[cur_core-1].len = i - template_data->cores[cur_core-1].head;
00545 }
00546 if ( (template_data->residue[i].SS == 'H' || template_data->residue[i].SS == 'E') &&
00547 last_state != template_data->residue[i].SS) {
00548 template_data->cores[cur_core].head=i;
00549 cur_core++;
00550 }
00551 last_state = template_data->residue[i].SS;
00552 }
00553 if (last_state != 'C') {
00554 template_data->cores[cur_core-1].len = template_data->len - template_data->cores[cur_core-1].head;
00555 }
00556 template_data->core_count=cur_core;
00557
00558
00559
00560
00561 for (int i = 0; i < template_data->core_count; i++) {
00562 #ifdef DEBUG
00563 fprintf(stderr, "Core %d (%c): %d\n", i, template_data->residue[ template_data->cores[i].head ].SS, template_data->cores[i].len );
00564 #endif
00565 if ( template_data->residue[ template_data->cores[i].head ].SS == 'H' && template_data->cores[i].len < cMinHelix ) {
00566 for (int j = i; j < template_data->core_count-1; j++) {
00567 template_data->cores[j] = template_data->cores[j+1];
00568 }
00569 template_data->core_count--;
00570 i--;
00571 } else if ( template_data->residue[ template_data->cores[i].head ].SS == 'E' && template_data->cores[i].len < cMinStrand ) {
00572 for (int j = i; j < template_data->core_count-1; j++) {
00573 template_data->cores[j] = template_data->cores[j+1];
00574 }
00575 template_data->core_count--;
00576 i--;
00577 }
00578 }
00579
00580 #if 1
00581
00582
00583 int core_residue_count = 0, core_interaction_count = 0;
00584 for (int i = 0; i < template_data->core_count; i++) {
00585 core_residue_count += template_data->cores[i].len;
00586 }
00587 for (int i = 0; i < template_data->core_count; i++) {
00588 for (int j = 0; j < template_data->core_count; j++) {
00589 if ( i != j) {
00590 for (int k= 0; k < template_data->twobody_count; k++) {
00591 if ( template_data->twobody_interactions[k].residue_number[0] >= template_data->cores[i].head &&
00592 template_data->twobody_interactions[k].residue_number[0] < template_data->cores[i].head + template_data->cores[i].len &&
00593 template_data->twobody_interactions[k].residue_number[1] >= template_data->cores[j].head &&
00594 template_data->twobody_interactions[k].residue_number[1] < template_data->cores[j].head + template_data->cores[i].len ) {
00595 core_interaction_count++;
00596 }
00597 }
00598 }
00599 }
00600 }
00601 float core_interaction_ratio = (float)core_interaction_count / (float) core_residue_count;
00602 #ifdef DEBUG
00603 fprintf(stderr, "protein core interaction ratio: %f\n", core_interaction_ratio);
00604 #endif
00605
00606 for (int i = 0; i < template_data->core_count; i++) {
00607 long cur_interaction_count = 0;
00608 #ifdef DEBUG
00609 fprintf(stderr, "core %d\n", i);
00610 #endif
00611 for (int j = 0; j < template_data->core_count; j++) {
00612 if ( i != j) {
00613 for (int k= 0; k < template_data->twobody_count; k++) {
00614 if (( template_data->twobody_interactions[k].residue_number[0] >= template_data->cores[i].head &&
00615 template_data->twobody_interactions[k].residue_number[0] < template_data->cores[i].head + template_data->cores[i].len &&
00616 template_data->twobody_interactions[k].residue_number[1] >= template_data->cores[j].head &&
00617 template_data->twobody_interactions[k].residue_number[1] < template_data->cores[j].head + template_data->cores[i].len) ||
00618 ( template_data->twobody_interactions[k].residue_number[1] >= template_data->cores[i].head &&
00619 template_data->twobody_interactions[k].residue_number[1] < template_data->cores[i].head + template_data->cores[i].len &&
00620 template_data->twobody_interactions[k].residue_number[0] >= template_data->cores[j].head &&
00621 template_data->twobody_interactions[k].residue_number[0] < template_data->cores[j].head + template_data->cores[i].len)
00622
00623 ) {
00624 cur_interaction_count++;
00625 #ifdef DEBUG
00626 fprintf(stderr, "\t%d -- %d\n", template_data->twobody_interactions[k].residue_number[0],
00627 template_data->twobody_interactions[k].residue_number[1] );
00628 #endif
00629 }
00630 }
00631 }
00632 }
00633 #ifdef DEBUG
00634 fprintf(stderr, "\tinteraction ratio %f\n", ( (float) cur_interaction_count / (float) template_data->cores[i].len ) );
00635 #endif
00636 if ( 0.3 * core_interaction_ratio > ( (float) cur_interaction_count / (float) template_data->cores[i].len ) ) {
00637
00638 for (int j = i; j < template_data->core_count-1; j++) {
00639 template_data->cores[j] = template_data->cores[j+1];
00640 }
00641 template_data->core_count--;
00642 i--;
00643 }
00644 }
00645
00646
00647
00648 for (int i = 0; i < template_data->core_count; i++) {
00649 long cur_interaction_count = 0;
00650 long residue_interaction_count[ template_data->cores[i].len ];
00651 for (int j = 0; j < template_data->cores[i].len; j++) {
00652 residue_interaction_count[j] = 0;
00653 }
00654 for (int j = 0; j < template_data->core_count; j++) {
00655 if ( i != j) {
00656 for (int k= 0; k < template_data->twobody_count; k++) {
00657 if ( template_data->twobody_interactions[k].residue_number[0] >= template_data->cores[i].head &&
00658 template_data->twobody_interactions[k].residue_number[0] < template_data->cores[i].head + template_data->cores[i].len &&
00659 template_data->twobody_interactions[k].residue_number[1] >= template_data->cores[j].head &&
00660 template_data->twobody_interactions[k].residue_number[1] < template_data->cores[j].head + template_data->cores[j].len
00661 ) {
00662 cur_interaction_count++;
00663 residue_interaction_count[ template_data->twobody_interactions[k].residue_number[0] - template_data->cores[i].head ]++;
00664 } else if (template_data->twobody_interactions[k].residue_number[1] >= template_data->cores[i].head &&
00665 template_data->twobody_interactions[k].residue_number[1] < template_data->cores[i].head + template_data->cores[i].len &&
00666 template_data->twobody_interactions[k].residue_number[0] >= template_data->cores[j].head &&
00667 template_data->twobody_interactions[k].residue_number[0] < template_data->cores[j].head + template_data->cores[j].len
00668 ) {
00669 cur_interaction_count++;
00670 residue_interaction_count[ template_data->twobody_interactions[k].residue_number[1] - template_data->cores[i].head ]++;
00671 }
00672 }
00673 }
00674 }
00675
00676 #ifdef DEBUG
00677
00678 for (int k = 0; k < template_data->cores[i].len; k++) {
00679 fprintf(stderr, "%d ", residue_interaction_count[k]);
00680 }
00681 #endif
00682 char removed = 0;
00683 long total_removed = 0, new_head = 0, new_len = template_data->cores[i].len;
00684 do {
00685 removed = 0;
00686 if ( residue_interaction_count[ new_head ] < residue_interaction_count[ new_len-1 ] ) {
00687 if ( total_removed + residue_interaction_count[new_head] < 0.1 * (float) cur_interaction_count ) {
00688 total_removed += residue_interaction_count[new_head];
00689 new_head++;
00690 removed = 1;
00691 }
00692 } else {
00693 if ( total_removed + residue_interaction_count[new_len-1] < 0.1 * (float) cur_interaction_count ) {
00694 total_removed += residue_interaction_count[new_len-1];
00695 new_len--;
00696 removed = 1;
00697 }
00698 }
00699 } while (removed);
00700 #ifdef DEBUG
00701 fprintf(stderr, "Core %d (%c) head %d (%d) len: %d (%d )\n", i, template_data->residue[ template_data->cores[i].head ].SS,
00702 template_data->cores[i].head + new_head, template_data->cores[i].head,
00703 new_len - new_head, template_data->cores[i].len );
00704 #endif
00705 template_data->cores[i].head += new_head;
00706 template_data->cores[i].len = new_len - new_head;
00707
00708 }
00709
00710
00711 for (int i = 0; i < template_data->core_count; i++) {
00712 if ( template_data->residue[ template_data->cores[i].head ].SS == 'H' && template_data->cores[i].len < cMinHelix-1 ) {
00713 for (int j = i; j < template_data->core_count-1; j++) {
00714 template_data->cores[j] = template_data->cores[j+1];
00715 }
00716 template_data->core_count--;
00717 i--;
00718 } else if ( template_data->residue[ template_data->cores[i].head ].SS == 'E' && template_data->cores[i].len < cMinStrand-1 ) {
00719 for (int j = i; j < template_data->core_count-1; j++) {
00720 template_data->cores[j] = template_data->cores[j+1];
00721 }
00722 template_data->core_count--;
00723 i--;
00724 }
00725 }
00726
00727 #endif
00728
00729
00730
00731
00732
00733 for (int i = 0; i < template_data->len; i++) {
00734 template_data->t_residue_info[i].core_num = -1;
00735 }
00736 for (int i = 0; i < template_data->core_count; i++) {
00737 for (int j = 0; j < template_data->cores[i].len; j++) {
00738 template_data->t_residue_info[ template_data->cores[i].head + j ].core_num = i;
00739 }
00740 }
00741
00742
00743 for (int i = 0; i < template_data->core_count; i++) {
00744 for (int j = 0; j < template_data->twobody_count; j++) {
00745 for (int k = 0; k < 2; k++) {
00746 if ( template_data->twobody_interactions[j].residue_number[k] >= template_data->cores[i].head &&
00747 template_data->twobody_interactions[j].residue_number[k] < template_data->cores[i].head + template_data->cores[i].len ) {
00748 template_data->twobody_interactions[j].core_number[k] = i;
00749 }
00750 }
00751 }
00752 }
00753 }
00754
00755
00756
00757
00758
00759
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782 AlignFeatures::AlignFeatures(void) {
00783 for ( int i =0; i < f_count; i++ ) {
00784 is_set[i] = 0;
00785 }
00786 }
00787
00788
00789 void AlignFeatures::CalcFeatures( class TemplateStruct *template_data,
00790 class TargetStruct *target_data,
00791 class AlignmentStruct *align ) {
00792
00793
00794
00795 feature[ fn_align ] = 0;
00796 feature[ fn_ident ] = 0;
00797 for (int i = 0; i < target_data->len; i++) {
00798 if (align->GetTargetAlign( i )!= -1 ) {
00799 feature[ fn_align ]++;
00800 if ( target_data->residue[i].RS == template_data->residue[ align->GetTargetAlign( i ) ].RS ) {
00801 feature[fn_ident]++;
00802 }
00803 }
00804 }
00805 feature[ fr_align ] = feature[fn_align] / (float)target_data->len;
00806 feature[ fr_ident ] = feature[fn_ident] / (float)target_data->len;
00807 is_set[ fr_align ] = 1;
00808 is_set[ fn_align ] = 1;
00809 is_set[ fr_ident ] = 1;
00810 is_set[ fn_ident ] = 1;
00811
00812
00813 feature[ fn_align_pair ] = 0;
00814 long pair_total = 0;
00815 for (int i = 0; i < template_data->twobody_count; i++) {
00816 if ( template_data->twobody_interactions[i].core_number[0] != -1 &&
00817 template_data->twobody_interactions[i].core_number[1] != -1 ) {
00818 pair_total++;
00819 if (align->GetTemplateAlign( template_data->twobody_interactions[i].residue_number[0] ) != -1 &&
00820 align->GetTemplateAlign( template_data->twobody_interactions[i].residue_number[1] ) != -1 ) {
00821 feature[ fn_align_pair ]++;
00822 }
00823 }
00824 }
00825 feature[ fr_align_pair ] = feature[ fn_align_pair ] / (double) pair_total;
00826 is_set[ fr_align_pair ] = 1;
00827 is_set[ fn_align_pair ] = 1;
00828
00829
00830
00831 feature[ fn_align_core_res ] = 0;
00832 feature[ fn_align_core ] = 0;
00833 for (int j = 0; j < template_data->core_count; j++) {
00834 if ( align->GetTemplateAlign( template_data->cores[j].head ) != -1 ) {
00835 feature[ fn_align_core ]++;
00836 }
00837 }
00838 for (int j = 0; j < template_data->core_count; j++) {
00839 feature[ fn_align_core_res ] += template_data->cores[j].len;
00840 }
00841 is_set[ fn_align_core_res ] = 1;
00842 is_set[ fn_align_core ] = 1;
00843
00844
00845
00846
00847
00848 feature[ fn_contact ] = 0;
00849 feature[ fn_halfcontact ] = 0;
00850 int total_contact = 0;
00851 for ( int i = 0; i < template_data->len; i++ ) {
00852 ResidueAtoms *res_1 = &template_data->structure[i];
00853 int align_1 = align->GetTemplateAlign(i);
00854 for ( int j = i+1; j < template_data->len; j++ ) {
00855 ResidueAtoms *res_2 = &template_data->structure[j];
00856 int align_2 = align->GetTemplateAlign(j);
00857 float dist = smDistance( res_1->atom[ res_1->bb_num[ anCa ] ],
00858 res_2->atom[ res_2->bb_num[ anCa ] ] );
00859 if ( dist < 7.6 ) {
00860 total_contact++;
00861 if ( (align_1 == -1 && align_2 != -1) || (align_1 != -1 && align_2 == -1) ) {
00862 feature[ fn_halfcontact ]++;
00863 }
00864 if ( (align_1 != -1 && align_2 != -1) ) {
00865 feature[ fn_contact ]++;
00866 }
00867 }
00868 }
00869 }
00870 feature[ fr_contact ] = feature[ fn_contact ] / (float) total_contact;
00871 feature[ fr_halfcontact ] = feature[ fn_halfcontact ] / (float) total_contact;
00872 is_set[ fr_contact ] = 1;
00873 is_set[ fn_contact ] = 1;
00874 is_set[ fr_halfcontact ] = 1;
00875 is_set[ fn_halfcontact ] = 1;
00876 }
00877
00878