00001
00002 #include <math.h>
00003 #include <stdio.h>
00004 #include <stdlib.h>
00005 #include <string.h>
00006 #include <ctype.h>
00007 #include <assert.h>
00008 #include <string>
00009
00010
00011
00012 #include <libxml/tree.h>
00013 #include <libxml/parser.h>
00014 #include <libxml/xpath.h>
00015 #include <libxml/xpathInternals.h>
00016
00017 #include "open_prospect.h"
00018 #include "sm_matrix.h"
00019
00020
00021 char *get_first_child_content( xmlNodePtr parent, char *str);
00022
00023
00024
00025 void append_to_str(char **str1, char *str2);
00026
00027
00028 int rot_mask_active(RotamerMask *mask, int i);
00029
00030
00031
00032 TargetStruct::TargetStruct(void) {
00033 this->len = 0;
00034 this->residue_offset = 0;
00035 this->twobody = NULL;
00036 this->hbond = NULL;
00037 this->bsheet = NULL;
00038 this->distconst = NULL;
00039 this->distconst_count = 0;
00040 this->residue = NULL;
00041 this->structure = NULL;
00042 }
00043
00044 TargetStruct::~TargetStruct() {
00045 FreeMem();
00046 }
00047
00048 void TargetStruct::FreeMem(void) {
00049 if (this->residue != NULL)
00050 delete this->residue;
00051 this->residue = NULL;
00052 if (this->structure != NULL)
00053 delete this->structure;
00054 this->structure = NULL;
00055
00056 if(this->twobody != NULL) {
00057 free(this->twobody[0]);
00058 free(this->twobody);
00059 }
00060 twobody = NULL;
00061 if (this->distconst != NULL) {
00062 free(this->distconst);
00063 }
00064 distconst = NULL;
00065 if (this->hbond != NULL) {
00066 free( this->hbond[0] );
00067 free( this->hbond );
00068 }
00069 hbond = NULL;
00070 if (this->bsheet != NULL) {
00071 free( this->bsheet[0] );
00072 free( this->bsheet );
00073 }
00074 bsheet = NULL;
00075 len = 0;
00076 }
00077
00078
00079 TargetStruct::TargetStruct(const TargetStruct &rhs) {
00080 len = 0;
00081 twobody = NULL;
00082 hbond = NULL;
00083 distconst = NULL;
00084 bsheet = NULL;
00085 distconst_count = 0;
00086 residue = NULL;
00087 residue_offset = rhs.residue_offset;
00088
00089 if ( rhs.len > 0 && rhs.residue != NULL) {
00090 len = rhs.len;
00091 residue = new TargetResidue[len];
00092 for (int i = 0; i < len; i++) {
00093 residue[i] = rhs.residue[i];
00094 }
00095 if ( rhs.twobody ) {
00096 twobody = (pValType **)malloc(sizeof(pValType *) * len);
00097 twobody[0] = (pValType *)malloc(sizeof(pValType *) * len * len );
00098 for (int i = 0; i < len; i++) {
00099 twobody[i] = twobody[0] + i * len;
00100 for (int j = 0; j < len; j++)
00101 twobody[i][j] = rhs.twobody[i][j];
00102 }
00103 }
00104 if ( rhs.hbond ) {
00105 hbond = (pValType **)malloc(sizeof(pValType *) * len);
00106 hbond[0] = (pValType *)malloc(sizeof(pValType *) * len * len );
00107 for (int i = 0; i < len; i++) {
00108 hbond[i] = hbond[0] + i * len;
00109 for (int j = 0; j < len; j++)
00110 hbond[i][j] = rhs.hbond[i][j];
00111 }
00112 }
00113 if ( rhs.bsheet ) {
00114 bsheet = (char **)malloc(sizeof(char *) * len);
00115 bsheet[0] = (char *)malloc(sizeof(char *) * len * len );
00116 for (int i = 0; i < len; i++) {
00117 bsheet[i] = bsheet[0] + i * len;
00118 for (int j = 0; j < len; j++)
00119 bsheet[i][j] = rhs.bsheet[i][j];
00120 }
00121 }
00122 }
00123 if ( rhs.distconst && rhs.distconst_count > 0) {
00124 distconst_count = rhs.distconst_count;
00125 distconst = (TargetDistConstraint *)malloc(sizeof(TargetDistConstraint) * distconst_count);
00126 for (int i = 0; i < distconst_count; i++) {
00127 distconst[i] = rhs.distconst[i];
00128 }
00129 }
00130 }
00131
00132 void TargetStruct::SetLen(int new_len) {
00133 if ( new_len != len ) {
00134 FreeMem();
00135 len = new_len;
00136 residue = new TargetResidue[len];
00137 structure = new ResidueAtoms[ len ];
00138 }
00139 };
00140
00141 int TargetStruct::Setup( ProspectParam *param ) {
00142 int i, j;
00143 pValType sum;
00144
00145 if (this->residue == NULL)
00146 return 1;
00147
00148 for (i = 0; i < this->len; i++) {
00149 sum = 0;
00150 for (j = 0; j < 20; j++)
00151 sum += this->residue[i].FREQ[j];
00152 if (sum < 0.000001) {
00153 this->residue[i].FREQ[ AA1toAANum( this->residue[i].RS ) ] = 1;
00154 }
00155 }
00156 this->twobody = (pValType **)malloc( sizeof(pValType *) * this->len );
00157 this->twobody[0] = (pValType *)malloc( sizeof(pValType) * this->len * this->len );
00158 for (i = 1; i < this->len; i++) {
00159 this->twobody[i] = this->twobody[0] + i*this->len;
00160 }
00161 for (i = 0; i < this->len; i++) {
00162 for (j = 0; j <= i; j++) {
00163 this->twobody[i][j] = calc_twobody_val(param, this,i,j);
00164 this->twobody[j][i] = this->twobody[i][j];
00165 }
00166 }
00167 if ( structure ) {
00168 for (i = 0; i < this->len; i++) {
00169 for (int j = 0; j < this->structure[i].atom_count; j++) {
00170
00171
00172
00173
00174
00175
00176
00177 this->structure[i].radius[j] =
00178 param->atom_data->FindRadius( this->structure[i].atom_name[j] );
00179 }
00180 }
00181 }
00182 return 0;
00183 }
00184
00185 int TargetStruct::SetSequence(char *seq) {
00186 SetLen( strlen( seq ) );
00187 for (int i = 0; i < this->len; i++) {
00188 this->residue[i].RS = toupper(seq[i]);
00189 for (int j = 0; j < 21; j++)
00190 this->residue[i].FREQ[j] = 0;
00191 this->residue[i].FREQ[ AA1toAANum(this->residue[i].RS) ] = 1;
00192 }
00193 this->twobody = NULL;
00194 this->distconst_count = 0;
00195 return 0;
00196 }
00197
00198
00199 void TargetStruct::SetChain(char new_chain) {
00200 for ( int i = 0; i < len; i++) {
00201 residue[i].chain = new_chain;
00202 if ( structure ) {
00203 structure[i].chain = new_chain;
00204 }
00205 }
00206 }
00207
00208
00209
00210
00211
00212
00213 void TargetStruct::ReadSingleDelStr( char *data ) {
00214 char *buffer = strdup(data) , RS, *cur_ptr;
00215 float tmp_float;
00216 int i = 0;
00217 cur_ptr = strtok( data, "\n\r" );
00218 do {
00219 sscanf( cur_ptr, "%c %f", &RS, &tmp_float );
00220 if ( i < len )
00221 residue[i].single_del = tmp_float;
00222 i++;
00223 } while ( (cur_ptr = strtok( NULL, "\n\r") ) );
00224 free(buffer);
00225 }
00226
00227 void TargetStruct::ReadSingleInsStr( char *data ) {
00228 char *buffer = strdup(data) , RS, *cur_ptr;
00229 float tmp_float;
00230 int i = 0;
00231 cur_ptr = strtok( data, "\n\r" );
00232 do {
00233 sscanf( cur_ptr, "%c %f", &RS, &tmp_float );
00234 if ( i < len )
00235 residue[i].single_ins = tmp_float;
00236 i++;
00237 } while ( (cur_ptr = strtok( NULL, "\n\r") ) );
00238 free(buffer);
00239 }
00240
00241
00242 int TargetStruct::LoadXML(char *xml_path) {
00243 char *read_buffer;
00244 xmlDocPtr doc;
00245 xmlNodePtr root;
00246
00247
00248 doc = xmlParseFile(xml_path);
00249 if (doc == NULL)
00250 return 1;
00251
00252 root = xmlDocGetRootElement(doc);
00253 if (root == NULL)
00254 return 1;
00255
00256 read_buffer = get_first_child_content(root, "ssp");
00257 if ( read_buffer ) {
00258 LoadSSPStr( read_buffer );
00259 free(read_buffer);
00260 }
00261 read_buffer = get_first_child_content(root, "singledel");
00262 if ( read_buffer ) {
00263 ReadSingleDelStr( read_buffer );
00264 free(read_buffer);
00265 }
00266 read_buffer = get_first_child_content(root, "singleins");
00267 if ( read_buffer ) {
00268 ReadSingleInsStr( read_buffer );
00269 free(read_buffer);
00270 }
00271 return 0;
00272 }
00273
00274
00275 int TargetStruct::LoadSSP(char *ssp_path) {
00276 char buffer[2000];
00277 FILE *ssp_file;
00278 ssp_file = fopen(ssp_path, "r");
00279 if (ssp_file == NULL) {
00280 return 1;
00281 }
00282 std::string my_string;
00283 while (fgets(buffer, sizeof(buffer), ssp_file) ) {
00284 my_string += buffer;
00285 }
00286 fclose(ssp_file);
00287 return LoadSSPStr( (char *)my_string.c_str() );
00288 }
00289
00290
00291 int TargetStruct::LoadSSPStr(char *ssp_text) {
00292 int i,j,num;
00293 char dummyc;
00294 char *buffer= strdup( ssp_text );
00295 TargetResidue *tmp_residue;
00296 float sum;
00297 float ss_prob[20];
00298
00299 this->distconst = NULL;
00300 tmp_residue = new TargetResidue[5000];
00301
00302 i=0;
00303
00304 char *cur_ptr = strtok( buffer, "\n\r" );
00305 while ( cur_ptr ) {
00306 j = sscanf(cur_ptr, "%d %c %c %f %f %f %c %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f",
00307 &num,
00308 &tmp_residue[i].RS,
00309 &tmp_residue[i].SS,
00310 &tmp_residue[i].SS_PROB[LOOP],
00311 &tmp_residue[i].SS_PROB[SHEET],
00312 &tmp_residue[i].SS_PROB[HELIX],
00313 &dummyc,
00314 &ss_prob[0],
00315 &ss_prob[1],
00316 &ss_prob[2],
00317 &ss_prob[3],
00318 &ss_prob[4],
00319 &ss_prob[5],
00320 &ss_prob[6],
00321 &ss_prob[7],
00322 &ss_prob[8],
00323 &ss_prob[9],
00324 &ss_prob[10],
00325 &ss_prob[11],
00326 &ss_prob[12],
00327 &ss_prob[13],
00328 &ss_prob[14],
00329 &ss_prob[15],
00330 &ss_prob[16],
00331 &ss_prob[17],
00332 &ss_prob[18],
00333 &ss_prob[19] );
00334 if (j != 27) {
00335 return 1;
00336 }
00337 sum = 0.0;
00338 for (j = 0; j < 20; j++) {
00339 tmp_residue[i].FREQ[j] = ss_prob[j];
00340 sum += ss_prob[j];
00341 }
00342
00343 tmp_residue[i].FREQ[20] = 0;
00344 if (sum > 0.0) {
00345 for (j=0;j<20;j++) {
00346 tmp_residue[i].FREQ[j] = tmp_residue[i].FREQ[j]/sum;
00347 assert(!isnan(tmp_residue[i].FREQ[j]));
00348 }
00349 } else {
00350
00351 }
00352 tmp_residue[i].orig_num = i;
00353 i++;
00354 cur_ptr = strtok( NULL, "\n\r" );
00355 }
00356 if ( i == 0 ) {
00357 delete tmp_residue;
00358 return 1;
00359 }
00360 SetLen( i );
00361 for (i = 0; i < len; i++) {
00362 this->residue[i] = tmp_residue[i];
00363 }
00364 delete tmp_residue;
00365 this->distconst_count = 0;
00366 return 0;
00367 }
00368
00369
00370 int TargetStruct::LoadConstraint(char *constraint_file) {
00371 FILE *file;
00372 char buffer[2048];
00373 int const_count = 0, const_size = 10, tmp;
00374 TargetDistConstraint *constraint = (TargetDistConstraint *)malloc(sizeof(TargetDistConstraint) * const_size);
00375
00376 file = fopen(constraint_file, "r");
00377
00378 while (fgets(buffer, sizeof(buffer), file)) {
00379 if (const_count >= const_size) {
00380 const_size += 10;
00381 constraint = (TargetDistConstraint *)realloc(constraint, sizeof(TargetDistConstraint) * const_size);
00382 }
00383 sscanf(buffer, "%d %d %f %f", &constraint[const_count].aa[0],
00384 &constraint[const_count].aa[1],
00385 &constraint[const_count].dist, &constraint[const_count].var);
00386
00387 if ( constraint[const_count].aa[0] > constraint[const_count].aa[1] ) {
00388 tmp = constraint[const_count].aa[0];
00389 constraint[const_count].aa[0] = constraint[const_count].aa[1];
00390 constraint[const_count].aa[1] = tmp;
00391 }
00392 const_count++;
00393 }
00394 fclose(file);
00395
00396 this->distconst = constraint;
00397 this->distconst_count = const_count;
00398 return 0;
00399 }
00400
00401
00402
00403
00404
00405 int TargetStruct::LoadPsiPred(char *psipred_path) {
00406 int i,j,num;
00407 char dummyc;
00408 FILE *psipred_file;
00409 i=0;
00410 TargetResidue *tmp_residue;
00411
00412 FreeMem();
00413
00414 psipred_file = fopen(psipred_path, "r");
00415 if (psipred_file == NULL) {
00416 return 1;
00417 }
00418
00419 this->distconst = NULL;
00420 tmp_residue = new TargetResidue[5000];
00421 while (!feof(psipred_file)) {
00422 fscanf(psipred_file,"%d%c%c%c%c%f%f%f",&num,&dummyc,
00423 &tmp_residue[i].RS,
00424 &dummyc,
00425 &tmp_residue[i].SS,
00426 &tmp_residue[i].SS_PROB[LOOP],
00427 &tmp_residue[i].SS_PROB[HELIX],
00428 &tmp_residue[i].SS_PROB[SHEET]);
00429 for (j=0; j < 21;j++) {
00430 tmp_residue[i].FREQ[j] = 0;
00431 }
00432 tmp_residue[i].FREQ[ AA1toAANum( tmp_residue[i].RS ) ] = 1;
00433 tmp_residue[i].orig_num = i;
00434 i++;
00435 }
00436 fclose(psipred_file);
00437 SetLen( num );
00438 for (i = 0; i < num; i++) {
00439 this->residue[i] = tmp_residue[i];
00440 }
00441 delete tmp_residue;
00442 this->distconst_count = 0;
00443 return 0;
00444 }
00445
00446
00447
00448
00449 int TargetStruct::LoadFasta(char *SeqPath) {
00450 FILE *fd;
00451 short i, j = 0;
00452 char s[1024];
00453 int QuerySeqSize = 0;
00454
00455 FreeMem();
00456
00457 if (!(fd = fopen (SeqPath, "r"))) {
00458 fprintf (stderr, "INPUT %s OPEN ERROR for %s:%d QUIT\n", SeqPath, __FILE__, __LINE__ );
00459 return 1;
00460 }
00461
00462 while (fgets (s, sizeof(s), fd)) {
00463 if (strncmp (s, "REM ", 4) != 0 && strncmp (s, ">", 1) != 0) {
00464 for (i = 0; s[i] != '\0'; i++) {
00465 if (s[i] != '\n' && s[i] != '\t' && s[i] != ' ' && s[i] != 13 && s[i] != 26) {
00466 QuerySeqSize++;
00467 }
00468 }
00469 }
00470 }
00471 rewind(fd);
00472
00473 j = 0;
00474 SetLen( QuerySeqSize );
00475 while (fgets (s, sizeof(s), fd)) {
00476 if (strncmp (s, "REM ", 4) != 0 && strncmp (s, ">", 1) != 0) {
00477 for (i = 0; s[i] != '\0'; i++) {
00478 if (s[i] != '\n' && s[i] != '\t' && s[i] != ' ' && s[i] != 13 && s[i] != 26) {
00479
00480 this->residue[j].RS = toupper(s[i]); j++;
00481 for (int k = 0; k < 21; k++)
00482 this->residue[j].FREQ[k] = 0;
00483 this->residue[j].FREQ[ AA1toAANum(this->residue[j].RS) ] = 1;
00484 }
00485 }
00486 }
00487 }
00488 fclose (fd);
00489 this->twobody = NULL;
00490 this->distconst_count = 0;
00491 return 0;
00492 }
00493
00494
00495
00496
00497
00498
00499 int TargetStruct::LoadPHD( char *filename) {
00500 FILE *fd;
00501 short i, k = -1;
00502 char s[1024], *ptr;
00503 short QuerySeqSize = 0;
00504
00505
00506
00507
00508 if (!(fd = fopen (filename, "r"))) {
00509 fprintf (stderr, "INPUT %s OPEN ERROR: %s:%d\n", filename, __FILE__, __LINE__);
00510 return 1;
00511 }
00512
00513 QuerySeqSize = 0;
00514 while (fgets (s, sizeof(s), fd)) {
00515 if ( strstr(s, "AA") ) {
00516 ptr = strstr(s, "|");
00517 if (ptr ==NULL) {
00518 fprintf(stderr, "Invalid PHD file\n");
00519 return -1;
00520 }
00521 ptr++;
00522 for (i = 0; i < 60; i++) {
00523 if ( isalpha( ptr[i] ) )
00524 QuerySeqSize++;
00525 }
00526 }
00527 }
00528
00529 rewind(fd);
00530
00531
00532
00533 SetLen( QuerySeqSize );
00534
00535
00536 while (fgets (s, sizeof(s), fd)) {
00537 if ( strstr(s, "AA") ) {
00538 k++;
00539 ptr = strstr(s, "|");
00540 if (ptr == NULL) {
00541 return -1;
00542 }
00543 ptr++;
00544 for (i = 0; i < 60; i++) {
00545 if ( isalpha(ptr[i]) ) {
00546 this->residue[60*k+i].RS = toupper( ptr[i] );
00547 }
00548 }
00549 } else if (strstr (s, "prH") ) {
00550 ptr = strstr(s, "|");
00551 ptr++;
00552 for (i = 0; i < 60; i++) {
00553 if (isdigit(ptr[i]) && 60*k+i < QuerySeqSize)
00554 this->residue[60*k+i].SS_PROB[HELIX] = ((pValType)(ptr[i] - '0')) * .1;
00555 }
00556 } else if (strstr (s, "prE") ) {
00557 ptr = strstr(s, "|");
00558 ptr++;
00559 for (i = 0; i < 60; i++) {
00560 if (isdigit(ptr[i]) && 60*k+i < QuerySeqSize)
00561 this->residue[60*k+i].SS_PROB[SHEET] = ((pValType) (ptr[i] - '0')) * .1;
00562 }
00563 } else if (strstr (s, "prL" ) ) {
00564 ptr = strstr(s, "|");
00565 ptr++;
00566 for (i = 0; i < 60; i++) {
00567 if (isdigit(ptr[i]) && 60*k+i < QuerySeqSize)
00568 this->residue[60*k+i].SS_PROB[LOOP]= ((pValType) (ptr[i] - '0')) * .1;
00569 }
00570 }
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581 }
00582 fclose (fd);
00583
00584 this->distconst_count = 0;
00585
00586 return 0;
00587 }
00588
00589
00590
00591 int TargetStruct::LoadChk(char *ChkPath) {
00592 long i, j, len;
00593 char *qseq;
00594 double tmp_dbl[20];
00595 FILE *file = fopen(ChkPath, "rb");
00596
00597 fread( &len, sizeof(long), 1, file);
00598
00599 SetLen(len);
00600
00601 qseq = (char *)malloc(sizeof(char) * len);
00602 fread(qseq, 1, len, file);
00603
00604 for (i = 0; i < len; i++)
00605 this->residue[i].RS = qseq[i];
00606
00607 for (i = 0; i < len; i++) {
00608
00609 fread(tmp_dbl, sizeof(double), 20, file);
00610 for (j = 0; j < 20; j++)
00611 this->residue[i].FREQ[j] = tmp_dbl[j];
00612 this->residue[i].FREQ[20] = 0;
00613 }
00614 fclose(file);
00615
00616 this->distconst_count = 0;
00617
00618 return 0;
00619 }
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630
00631
00632
00633
00634
00635
00636
00637
00638
00639
00640
00641
00642
00643
00644
00645
00646
00647
00648
00649
00650
00651 int TargetStruct::LoadFreq(char *FreqPath) {
00652 long i,j;
00653 int temp_int;
00654 char s[2048];
00655 float temp_freq[20];
00656 FILE *file = NULL;
00657
00658 if ( FreqPath == NULL ){
00659 fprintf(stderr,"NULL pointer FreqPath %s %d \n",__FILE__,__LINE__);
00660 return -1;
00661 }
00662 if ( (file = fopen(FreqPath, "r")) == NULL ){
00663 fprintf(stderr,"NULL pointer opening %s %s %d \n",FreqPath,__FILE__,__LINE__);
00664 return -1;
00665 }
00666
00667 do {
00668 if (!fgets(s, sizeof(s), file)) {
00669 fclose(file);
00670 return -1;
00671 }
00672 } while ( 1 != sscanf(s, "%d", &temp_int));
00673 if (temp_int == 0) {
00674 fprintf(stderr,"Invalid Freqfile\n");
00675 fclose(file);
00676 return -1;
00677 }
00678
00679 SetLen( temp_int );
00680
00681 fgets(s, sizeof(s), file);
00682 for (i = 0; i < this->len; i++)
00683 this->residue[i].RS = s[i];
00684
00685
00686 i = 0;
00687 while (fgets (s, sizeof(s), file) && i < this->len) {
00688 sscanf( s, "%f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f",
00689 &temp_freq[0], &temp_freq[1], &temp_freq[2], &temp_freq[3], &temp_freq[4],
00690 &temp_freq[5], &temp_freq[6], &temp_freq[7], &temp_freq[8], &temp_freq[9],
00691 &temp_freq[10], &temp_freq[11], &temp_freq[12], &temp_freq[13], &temp_freq[14],
00692 &temp_freq[15], &temp_freq[16], &temp_freq[17], &temp_freq[18], &temp_freq[19] );
00693 for (j = 0; j < 20; j++) {
00694 this->residue[i].FREQ[j] = temp_freq[j];
00695 }
00696 this->residue[i].FREQ[20] = 0;
00697 i++;
00698 }
00699
00700 this->distconst_count = 0;
00701
00702
00703 return 0;
00704 }
00705
00706
00707
00708 int TargetStruct::Copy(TargetStruct *rhs, int start, int stop) {
00709 if ( stop < 0)
00710 stop = rhs->len;
00711 if ( stop > rhs->len)
00712 stop = rhs->len;
00713 if (start < 0)
00714 start = 0;
00715
00716 SetLen( stop - start );
00717 for (int i = start; i < stop; i++) {
00718 this->residue[i-start] = rhs->residue[i];
00719 }
00720 for (int i = start; i < stop; i++) {
00721 this->structure[i-start] = rhs->structure[i];
00722 }
00723 return 0;
00724 }
00725
00726
00727
00728 int TargetStruct::Append(TargetStruct *rhs, int start, int stop) {
00729 if ( stop < 0)
00730 stop = rhs->len;
00731 if ( stop > rhs->len)
00732 stop = rhs->len;
00733 if (start < 0)
00734 start = 0;
00735 int orig_len = len;
00736 TargetStruct tmp_targ;
00737 tmp_targ.Copy( this );
00738 SetLen( orig_len + stop - start );
00739 for ( int i = 0; i < orig_len; i++ ) {
00740 this->residue[i] = tmp_targ.residue[i];
00741 this->structure[i] = tmp_targ.structure[i];
00742 }
00743 for (int i = start; i < stop; i++) {
00744 this->residue[orig_len + i-start] = rhs->residue[i];
00745 this->structure[orig_len + i-start] = rhs->structure[i];
00746 }
00747 return 0;
00748 }
00749
00750
00751
00752
00753 int TargetStruct::LoadPDB(char *filename, char chain) {
00754 FILE *file;
00755 if (NULL == (file = fopen( filename, "r"))) {
00756 return 1;
00757 }
00758
00759 FreeMem();
00760
00762 int tmp_len = 0;
00763 char cur_residue_name[20];
00764 char buffer[2000];
00765 strcpy(cur_residue_name, "");
00766 while (fgets(buffer, sizeof(buffer), file)) {
00767 char tmp_str[7];
00768 if ( ( !strncmp(buffer, "ATOM", 4) || !strncmp(buffer, "HETATM", 4)) && (chain == 0 || toupper(chain) == toupper(buffer[21]) ) ) {
00769 strncpy( tmp_str, &(buffer[17]), 3 );
00770 tmp_str[3] = 0;
00771 char cur_RS = AA3toAA1( tmp_str );
00772 if ( cur_RS != 'X' ) {
00773 strncpy( tmp_str, &(buffer[22]), 5 );
00774 tmp_str[5] = 0;
00775 if (strcmp( cur_residue_name, tmp_str ) ) {
00776 tmp_len++;
00777 strcpy(cur_residue_name, tmp_str);
00778 }
00779 }
00780 }
00781 if ( !strncmp(buffer, "ENDMDL", 6) ) {
00782 break;
00783 }
00784 }
00785 rewind(file);
00787
00788 SetLen( tmp_len );
00789
00790 for (int i = 0; i < len; i++) {
00791 this->structure[i].atom_count = 0;
00792 this->residue[i].RS = -1;
00793 }
00794
00795 int res_num = -1;
00796 strcpy(cur_residue_name, "");
00797 char tmp_x_str[30], tmp_y_str[30], tmp_z_str[30];
00798 strcpy(tmp_x_str, "");
00799 strcpy(tmp_y_str, "");
00800 strcpy(tmp_z_str, "");
00801 while (fgets(buffer, sizeof(buffer), file) ) {
00802 char tmp_str[50];
00803 if ( ( !strncmp(buffer, "ATOM", 4) || !strncmp(buffer, "HETATM", 4)) && (chain == 0 || toupper(chain) == toupper(buffer[21]) ) ) {
00804 strncpy( tmp_str, &(buffer[17]), 3 );
00805 tmp_str[3] = 0;
00806 char cur_RS = AA3toAA1( tmp_str );
00807 if ( cur_RS != 'X' ) {
00808 strncpy( tmp_str, &(buffer[22]), 5 );
00809 tmp_str[5] = 0;
00810 if (strcmp( cur_residue_name, tmp_str ) ) {
00811 res_num++;
00812 strcpy( cur_residue_name, tmp_str );
00813 }
00814 if (res_num >= 0 && this->structure[res_num].atom_count < kMaxResAtoms) {
00815 this->structure[ res_num ].ReadAtomLine( buffer );
00816 this->residue[res_num].RS = cur_RS;
00817 }
00818 }
00819 }
00820 if ( !strncmp(buffer, "ENDMDL", 6) ) {
00821 break;
00822 }
00823 }
00824 fclose(file);
00825
00826
00827
00828 return 0;
00829 }
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843 int TargetStruct::CopyBackBone( TemplateStruct *template_data,
00844 AlignmentStruct *align_data,
00845 int start, int stop) {
00846 if ( stop < 0 || stop > this->len )
00847 stop = this->len;
00848 if (start < 0)
00849 start = 0;
00850 for (int i = start; i < stop; i++) {
00851 int j = align_data->GetTargetAlign(i);
00852 if (j != -1) {
00853 this->structure[i] = template_data->structure[j];
00854 this->residue[i].psi = template_data->residue[j].psi;
00855 this->residue[i].phi = template_data->residue[j].phi;
00856 } else {
00857 this->structure[i].atom_count = 0;
00858 }
00859 }
00860 return 0;
00861 }
00862
00863
00864
00865
00866 int TargetStruct::SideChain_Init(ProspectParam *param) {
00867 for (int i = 0; i < this->len; i++) {
00868 if ( this->structure[i].atom_count >= 4 ) {
00869 int res_num = AA1toAANum( this->residue[i].RS);
00870 if (res_num >= 0 && res_num < 20) {
00871 ResidueInfoElement *res_info = ¶m->residue_info[ res_num ];
00872
00873 assert( param->residue_info[ res_num ].structure.atom_count < kMaxResAtoms );
00874 int k = this->structure[i].atom_count;
00875 assert( k < kMaxResAtoms );
00876
00877 for (int j = 0; j < res_info->structure.atom_count; j++) {
00878
00879
00880
00881
00882
00883
00884
00885
00886 this->structure[i].radius[k] = param->atom_data->FindRadius( this->structure[i].atom_name[k] );
00887 k++;
00888 }
00889 this->structure[i].atom_count = k;
00890 }
00891 }
00892 }
00893 return 0;
00894 }
00895
00896
00897 pValType calc_LRET(vec3 a, vec3 b, pValType ra, pValType rb) {
00898 pValType dist[3], d;
00899 pValType rad_dist = ra + rb;
00900 dist[0] = a[0] - b[0];
00901 dist[1] = a[1] - b[1];
00902 dist[2] = a[2] - b[2];
00903 dist[0] = dist[0] * dist[0];
00904 dist[1] = dist[1] * dist[1];
00905 dist[2] = dist[2] * dist[2];
00906 d = dist[0] + dist[1] + dist[2];
00907 if ( d > rad_dist * rad_dist)
00908 return 0.0;
00909 d = sqrt(d);
00910
00911 if ( d <= rad_dist*0.8254 )
00912 return 10;
00913 return 57.273 * (1 - d/rad_dist);
00914 }
00915
00916 pValType TargetStruct::Calc_LRET(ProspectParam *param) {
00917 pValType energy = 0.0;
00918
00919 for (int i = 0; i < this->len; i++) {
00920 for (int ia = 0; ia < this->structure[i].atom_count; ia++) {
00921 for (int ja = ia+1; ja < this->structure[i].atom_count; ja++) {
00922 energy += calc_LRET( this->structure[i].atom[ia], this->structure[i].atom[ja],
00923 this->structure[i].radius[ia], this->structure[i].radius[ja]
00924 );
00925 }
00926 for (int j = i+1; j < this->len; j++) {
00927 for (int ja = 0; ja < this->structure[j].atom_count; ja++) {
00928 energy += calc_LRET( this->structure[i].atom[ia], this->structure[j].atom[ja],
00929 this->structure[i].radius[ia], this->structure[j].radius[ja]
00930 );
00931 }
00932 }
00933 }
00934 }
00935 return energy;
00936 }
00937
00938
00939
00940
00941
00942 int TargetStruct::SideChain_Pack(ProspectParam *param) {
00943 RotamerMask rotamer_mask;
00944
00945 if (param->v_level > 0)
00946 fprintf(stderr, "SideChain: Calculating energy\n");
00947 Prospect_Target_RotamerMask_Init(param, this, &rotamer_mask);
00948 if (param->v_level > 0)
00949 fprintf(stderr, "SideChain: DEE\n");
00950 Prospect_RotamerMask_DEE(&rotamer_mask);
00951 if (param->v_level > 0)
00952 fprintf(stderr, "SideChain: DEE_Split\n");
00953 Prospect_RotamerMask_DEE_Split(&rotamer_mask);
00954
00955
00956
00957 if (param->v_level > 1) {
00958 fprintf(stderr, "Before tree decomp:\n");
00959 for (int i = 0; i < this->len; i++) {
00960 fprintf(stderr, "Residue %d '%c' (%d of %ld rotamers)\n",
00961 i, this->residue[i].RS,
00962 rot_mask_active(&rotamer_mask, i), rotamer_mask.count[i]);
00963 }
00964 }
00965 if (param->v_level > 0)
00966 fprintf(stderr, "SideChain: Tree decomposition\n");
00967 Prospect_RotamerMask_TreeDecomp(&rotamer_mask);
00968
00969
00970
00971
00972 for (int i = 0; i < this->len; i++) {
00973 if ( this->structure[i].atom_count >= 4) {
00974 int count = 0;
00975 int a = 0;
00976 for (int rot_a = 0; rot_a < rotamer_mask.count[i]; rot_a++) {
00977 if ( rotamer_mask.state[i][rot_a] ) {
00978 if (count == 0)
00979 a = rot_a;
00980 count++;
00981 }
00982 }
00983 if ( this->residue[i].RS == 'P' || this->residue[i].RS == 'G' || count == 0 ) {
00984 Prospect_Target_Rotamer(param, &this->structure[i], this->residue[i].RS, 0);
00985 } else {
00986 Prospect_Target_Rotamer(param, &this->structure[i], this->residue[i].RS, rotamer_mask.rot_num[i][a]);
00987 }
00988
00989
00990
00991
00992
00993
00994 }
00995 }
00996 Prospect_Target_RotamerMask_Free(&rotamer_mask);
00997 return 0;
00998 }
00999
01000
01001
01002 int TargetStruct::Count_Atoms(void) {
01003 if ( structure == NULL )
01004 return 0;
01005 int count = 0;
01006 for (int i = 0; i < this->len; i++) {
01007 count += this->structure[i].atom_count;
01008 }
01009 return count;
01010 }
01011
01012 void TargetStruct::ReNumberAtoms(int start) {
01013 if ( structure == NULL )
01014 return;
01015 int cur_atom = start;
01016 for ( int i = 0; i < len; i++) {
01017 for (int j = 0; j < structure[i].atom_count; j++) {
01018 structure[i].atom_num[j] = cur_atom;
01019 cur_atom++;
01020 }
01021 }
01022 }
01023
01024
01025 int TargetStruct::Count_SetResidues(void) {
01026 if ( structure == NULL )
01027 return 0;
01028 int count = 0;
01029 for (int i = 0; i < this->len; i++) {
01030 if ( this->structure[i].atom_count >= 4 )
01031 count++;
01032 }
01033 return count;
01034 }
01035
01036
01037 int TargetStruct::Count_UnSetResidues() {
01038 if ( structure == NULL )
01039 return len;
01040 int count = 0;
01041 for (int i = 0; i < this->len; i++) {
01042 if ( this->structure[i].atom_count < 4 )
01043 count++;
01044 }
01045 return count;
01046 }
01047
01048
01049
01050 vec3 * TargetStruct::GetCaLocArray(void) {
01051 vec3 *data_out = (vec3 *)malloc(sizeof(vec3) * len );
01052 for ( int i = 0; i < len; i++ ) {
01053 if ( structure[i].bb_num[ anCa ] != -1 ) {
01054 structure[i].GetBBLoc( anCa, data_out[i] );
01055 } else {
01056 data_out[i][0] = 0.0 / 0.0;
01057 data_out[i][1] = 0.0 / 0.0;
01058 data_out[i][2] = 0.0 / 0.0;
01059 }
01060 }
01061 return data_out;
01062 }
01063
01064
01065
01066 int TargetStruct::FindResIndexFromResNum( int res_num, char res_mod ) {
01067 if ( structure == NULL )
01068 return -1;
01069 for ( int i = 0; i < len; i++ ) {
01070 if ( res_num == structure[i].res_num && res_mod == structure[i].res_mod ) {
01071 return i;
01072 }
01073 }
01074 return -1;
01075 }
01076
01077
01078
01079
01080
01081
01082
01083 #define kDist_nh 1.0
01084
01085
01086 void TargetStruct::Calc_hloc(int residue, vec3 h_loc) {
01087 float oc_dist, nca_dist, nh_dist, cah_dist;
01088 long k;
01089
01090 #if 1
01091
01092 ResidueAtoms *residue_struct = &this->structure[ residue ];
01093
01094 oc_dist = sqrt( pow( residue_struct->atom[ (int)residue_struct->bb_num[ anC ] ][0] - residue_struct->atom[ (int)residue_struct->bb_num[ anO ] ][0], 2) +
01095 pow( residue_struct->atom[ (int)residue_struct->bb_num[ anC ] ][1] - residue_struct->atom[ (int)residue_struct->bb_num[ anO ] ][1], 2) +
01096 pow( residue_struct->atom[ (int)residue_struct->bb_num[ anC ] ][2] - residue_struct->atom[ (int)residue_struct->bb_num[ anO ] ][2], 2) );
01097
01098 for (k = 0; k < 3; k++) {
01099 h_loc[k] = residue_struct->atom[ (int)residue_struct->bb_num[ anN ] ][k] +
01100 (residue_struct->atom[ (int)residue_struct->bb_num[ anC ] ][k] - residue_struct->atom[ (int)residue_struct->bb_num[ anO ] ][k]) / oc_dist;
01101 }
01102
01103
01104 cah_dist = sqrt( pow( residue_struct->atom[ (int)residue_struct->bb_num[ anCa ] ][0] - h_loc[0], 2) +
01105 pow( residue_struct->atom[ (int)residue_struct->bb_num[ anCa ] ][1] - h_loc[1], 2) +
01106 pow( residue_struct->atom[ (int)residue_struct->bb_num[ anCa ] ][2] - h_loc[2], 2) );
01107
01108 nca_dist = sqrt( pow( residue_struct->atom[ (int)residue_struct->bb_num[ anN ] ][0] - residue_struct->atom[ (int)residue_struct->bb_num[ anCa ] ][0], 2) +
01109 pow( residue_struct->atom[ (int)residue_struct->bb_num[ anN ] ][1] - residue_struct->atom[ (int)residue_struct->bb_num[ anCa ] ][1], 2) +
01110 pow( residue_struct->atom[ (int)residue_struct->bb_num[ anN ] ][2] - residue_struct->atom[ (int)residue_struct->bb_num[ anCa ] ][2], 2) );
01111
01112
01113 nh_dist = sqrt( pow( residue_struct->atom[ (int)residue_struct->bb_num[ anN ] ][0] - h_loc[0], 2) +
01114 pow( residue_struct->atom[ (int)residue_struct->bb_num[ anN ] ][1] - h_loc[1], 2) +
01115 pow( residue_struct->atom[ (int)residue_struct->bb_num[ anN ] ][2] - h_loc[2], 2) );
01116
01117
01118 if ( 0 < (nca_dist*nca_dist + nh_dist * nh_dist - cah_dist * cah_dist) / (2*nca_dist*nh_dist) ) {
01119 for (k = 0; k < 3; k++) {
01120 h_loc[k] = residue_struct->atom[ (int)residue_struct->bb_num[ anN ] ][k] -
01121 (residue_struct->atom[ (int)residue_struct->bb_num[ anC ] ][k] - residue_struct->atom[ (int)residue_struct->bb_num[ anO ] ][k]) / oc_dist;
01122 }
01123 }
01124
01125 #else
01126 float nc_dist;
01127
01128 nc_dist = sqrt( pow( pdb->atoms[ residue.n ].loc[0] - pdb->atoms[ residue.c ].loc[0], 2) +
01129 pow( pdb->atoms[ residue.n ].loc[1] - pdb->atoms[ residue.c ].loc[1], 2) +
01130 pow( pdb->atoms[ residue.n ].loc[2] - pdb->atoms[ residue.c ].loc[2], 2) );
01131
01132 nca_dist = sqrt( pow( pdb->atoms[ residue.n ].loc[0] - pdb->atoms[ residue.ca ].loc[0], 2) +
01133 pow( pdb->atoms[ residue.n ].loc[1] - pdb->atoms[ residue.ca ].loc[1], 2) +
01134 pow( pdb->atoms[ residue.n ].loc[2] - pdb->atoms[ residue.ca ].loc[2], 2) );
01135
01136 for (k = 0; k < 3; k++) {
01137 h_loc[k] = pdb->atoms[ residue.n ].loc[k] -
01138 ( ( pdb->atoms[ residue.c ].loc[k] - pdb->atoms[ residue.n ].loc[k] ) / nc_dist +
01139 ( pdb->atoms[ residue.ca ].loc[k] - pdb->atoms[ residue.n ].loc[k] ) / nca_dist );
01140 }
01141
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154 #endif
01155
01156 }
01157
01158
01159 #define q1 0.42
01160 #define q2 0.20
01161 #define f_val 332
01162
01163 float TargetStruct::Calc_hval(int i, int j) {
01164 vec3 h_loc;
01165 float on_dist, ch_dist, oh_dist, cn_dist, nh_dist;
01166
01167
01168
01169 Calc_hloc( j, h_loc);
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182 ResidueAtoms *residue_i = &this->structure[i],
01183 *residue_j = &this->structure[j];
01184
01185 on_dist = sqrt( pow( residue_i->atom[ (int)residue_i->bb_num[ anO ]][0] - residue_j->atom[ (int)residue_j->bb_num[ anN ]][0], 2) +
01186 pow( residue_i->atom[ (int)residue_i->bb_num[ anO ]][1] - residue_j->atom[ (int)residue_j->bb_num[ anN ]][1], 2) +
01187 pow( residue_i->atom[ (int)residue_i->bb_num[ anO ]][2] - residue_j->atom[ (int)residue_j->bb_num[ anN ]][2], 2) );
01188
01189 ch_dist = sqrt( pow( residue_i->atom[ (int)residue_i->bb_num[ anC ]][0] - h_loc[0], 2) +
01190 pow( residue_i->atom[ (int)residue_i->bb_num[ anC ]][1] - h_loc[1], 2) +
01191 pow( residue_i->atom[ (int)residue_i->bb_num[ anC ]][2] - h_loc[2], 2) );
01192
01193 oh_dist = sqrt( pow( residue_i->atom[ (int)residue_i->bb_num[ anO ]][0] - h_loc[0], 2) +
01194 pow( residue_i->atom[ (int)residue_i->bb_num[ anO ]][1] - h_loc[1], 2) +
01195 pow( residue_i->atom[ (int)residue_i->bb_num[ anO ]][2] - h_loc[2], 2) );
01196
01197
01198 cn_dist = sqrt( pow( residue_i->atom[ (int)residue_i->bb_num[ anC ]][0] - residue_j->atom[ (int)residue_j->bb_num[ anN ]][0], 2) +
01199 pow( residue_i->atom[ (int)residue_i->bb_num[ anC ]][1] - residue_j->atom[ (int)residue_j->bb_num[ anN ]][1], 2) +
01200 pow( residue_i->atom[ (int)residue_i->bb_num[ anC ]][2] - residue_j->atom[ (int)residue_j->bb_num[ anN ]][2], 2) );
01201
01202 nh_dist = sqrt( pow( residue_i->atom[ (int)residue_i->bb_num[ anN ]][0] - h_loc[0], 2) +
01203 pow( residue_i->atom[ (int)residue_i->bb_num[ anN ]][1] - h_loc[1], 2) +
01204 pow( residue_i->atom[ (int)residue_i->bb_num[ anN ]][2] - h_loc[2], 2) );
01205
01206 return q1 * q2 * ( 1 / on_dist + 1 / ch_dist - 1 / oh_dist - 1 / cn_dist) * f_val;
01207 }
01208
01209
01210
01211
01212 void TargetStruct::Setup_Hbonds( void ) {
01213 long i,j;
01214 float ca_ca_dist;
01215
01216
01217 this->hbond = (float **)malloc(sizeof(float*) * this->len );
01218 this->hbond[0] = (float *)malloc(sizeof(float) * this->len * this->len );
01219 for (i = 1; i < this->len; i++) {
01220 this->hbond[i] = this->hbond[0] + this->len * i;
01221 }
01222
01223 for (i = 0; i < this->len; i++) {
01224 for (j = 0; j < this->len; j++) {
01225 if ( abs(j-i) >= 3 ) {
01226 ResidueAtoms *residue_i = &this->structure[i],
01227 *residue_j = &this->structure[j];
01228
01229 ca_ca_dist = sqrt( pow( residue_i->atom[ (int)residue_i->bb_num[ anCa ] ][0] - residue_j->atom[ (int)residue_j->bb_num[ anCa ] ][0], 2) +
01230 pow( residue_i->atom[ (int)residue_i->bb_num[ anCa ] ][1] - residue_j->atom[ (int)residue_j->bb_num[ anCa ] ][1], 2) +
01231 pow( residue_i->atom[ (int)residue_i->bb_num[ anCa ] ][2] - residue_j->atom[ (int)residue_j->bb_num[ anCa ] ][2], 2) );
01232 if ( ca_ca_dist > 9) {
01233 this->hbond[ i ][ j ] = 10.0;
01234 } else {
01235 this->hbond[ i ][ j ] = Calc_hval(i, j);
01236 }
01237 } else {
01238 this->hbond[ i ][ j ] = 10.0;
01239 }
01240 }
01241 }
01242 }
01243
01244
01245 int TargetStruct::DSSP(void) {
01246 if ( hbond == NULL)
01247 Setup_Hbonds();
01248
01249
01250 char *dssp_array = (char *)malloc(len + 7);
01251 memset( dssp_array, 0, len + 7 );
01252
01253 #define dsHCutoff -0.5
01254 #define dsParallelBridge 0x01
01255 #define dsAntiParallelBridge 0x02
01256 #define ds3Helix 0x04
01257 #define ds4Helix 0x08
01258 #define ds5Helix 0x10
01259 #define ds3Turn 0x20
01260 #define ds4Turn 0x40
01261 #define ds5Turn 0x80
01262
01263 bsheet = (char **)malloc( sizeof( char * ) * len );
01264 bsheet[0] = (char *)malloc( len * len );
01265 for (int i = 0; i < len; i++) {
01266 bsheet[i] = bsheet[0] + len * i;
01267 for (int j = 0; j < len; j++) {
01268 bsheet[i][j] = 0;
01269 }
01270 }
01271
01272
01273 for (int i = 1; i < len-1; i++) {
01274 for (int j = 0; j < len; j++) {
01275 if ( hbond[i-1][j] < dsHCutoff && hbond[j][i+1] < dsHCutoff) {
01276 dssp_array[i] |= dsParallelBridge;
01277 dssp_array[j] |= dsParallelBridge;
01278 bsheet[i][j] = 1;
01279 bsheet[j][i] = 1;
01280 }
01281 }
01282 }
01283
01284
01285 for (int i = 0; i < len; i++) {
01286 for (int j = 0; j < len; j++) {
01287 if ( hbond[i][j] < dsHCutoff && hbond[j][i] < dsHCutoff) {
01288 dssp_array[i] |= dsAntiParallelBridge;
01289 dssp_array[j] |= dsAntiParallelBridge;
01290 bsheet[i][j] = 1;
01291 bsheet[j][i] = 1;
01292 }
01293 }
01294 }
01295
01296 for (int i = 1; i < len-1; i++) {
01297 for (int j = 1; j < len-1; j++) {
01298 if ( hbond[i-1][j+1] < dsHCutoff && hbond[j-1][i+1] < dsHCutoff) {
01299 dssp_array[i] |= dsAntiParallelBridge;
01300 dssp_array[j] |= dsAntiParallelBridge;
01301 bsheet[i][j] = 1;
01302 bsheet[j][i] = 1;
01303 }
01304 }
01305 }
01306
01307 for (int i = 0; i < len-3; i++) {
01308 if (hbond[i][i+3] < dsHCutoff) {
01309 dssp_array[i] |= ds3Turn;
01310 }
01311 }
01312
01313 for (int i = 0; i < len-4; i++) {
01314 if (hbond[i][i+4] < dsHCutoff) {
01315 dssp_array[i] |= ds4Turn;
01316 }
01317 }
01318
01319 for (int i = 0; i < len-5; i++) {
01320 if (hbond[i][i+5] < dsHCutoff) {
01321 dssp_array[i] |= ds5Turn;
01322 }
01323 }
01324
01325 for (int i = 1; i < len ; i++) {
01326 if ( dssp_array[i-1] & ds3Turn && dssp_array[i] & ds3Turn ) {
01327 dssp_array[i] |= ds3Helix;
01328 dssp_array[i+1] |= ds3Helix;
01329 dssp_array[i+2] |= ds3Helix;
01330 }
01331 }
01332
01333 for (int i = 1; i < len ; i++) {
01334 if ( dssp_array[i-1] & ds4Turn && dssp_array[i] & ds4Turn ) {
01335 dssp_array[i] |= ds4Helix;
01336 dssp_array[i+1] |= ds4Helix;
01337 dssp_array[i+2] |= ds4Helix;
01338 dssp_array[i+3] |= ds4Helix;
01339 }
01340 }
01341
01342 for (int i = 1; i < len ; i++) {
01343 if ( dssp_array[i-1] & ds5Turn && dssp_array[i] & ds5Turn ) {
01344 dssp_array[i] |= ds5Helix;
01345 dssp_array[i+1] |= ds5Helix;
01346 dssp_array[i+2] |= ds5Helix;
01347 dssp_array[i+3] |= ds5Helix;
01348 dssp_array[i+4] |= ds5Helix;
01349 dssp_array[i+5] |= ds5Helix;
01350 }
01351 }
01352 for (int i = 0; i < len; i++) {
01353 if ( dssp_array[i] & dsParallelBridge || dssp_array[i] & dsAntiParallelBridge ) {
01354 residue[i].SS = 'E';
01355 } else if ( dssp_array[i] & ds3Helix || dssp_array[i] & ds4Helix || dssp_array[i] & ds5Helix ) {
01356 residue[i].SS = 'H';
01357 } else {
01358 residue[i].SS = 'C';
01359 }
01360 }
01361
01362 free(dssp_array);
01363 return 0;
01364 }
01365
01366
01367
01368
01369 #define D_CB_CA 1.538
01370 #define A_CB_CA_N 113.5
01371 #define A_CB_CA_C 108
01372
01373
01374 int ResidueAtoms_CalcCb(ResidueAtoms *res) {
01375
01376
01377
01378
01379 double n2, c2, ca2;
01380 double d_ca_n, d_ca_c;
01381 double ntemp, ctemp;
01382 double xz, xz0;
01383 double yz, yz0;
01384 double zz, zz0;
01385 double x1,y1,z1,x2,y2,z2,z,d1,d2;
01386 double tx=19.491, ty=5.806, tz=22.004;
01387 int Gly_flag = 1;
01388
01389
01390 vec3 n, o, c, ca, cb;
01391
01392 if ( res->bb_num[ anN ] == -1 || res->bb_num[ anO ] == -1 || res->bb_num[ anC ] == -1 || res->bb_num[ anCa ] == -1 ) {
01393 return 1;
01394 }
01395
01396 n[0] = res->atom[ res->bb_num[ anN ] ][0];
01397 n[1] = res->atom[ res->bb_num[ anN ] ][1];
01398 n[2] = res->atom[ res->bb_num[ anN ] ][2];
01399 o[0] = res->atom[ res->bb_num[ anO ] ][0];
01400 o[1] = res->atom[ res->bb_num[ anO ] ][1];
01401 o[2] = res->atom[ res->bb_num[ anO ] ][2];
01402 c[0] = res->atom[ res->bb_num[ anC ] ][0];
01403 c[1] = res->atom[ res->bb_num[ anC ] ][1];
01404 c[2] = res->atom[ res->bb_num[ anC ] ][2];
01405 ca[0] = res->atom[ res->bb_num[ anCa ] ][0];
01406 ca[1] = res->atom[ res->bb_num[ anCa ] ][1];
01407 ca[2] = res->atom[ res->bb_num[ anCa ] ][2];
01408
01409
01410 n2 = n[0]*n[0]+n[1]*n[1]+n[2]*n[2];
01411 c2 = c[0]*c[0]+c[1]*c[1]+c[2]*c[2];
01412 ca2 = ca[0]*ca[0]+ca[1]*ca[1]+ca[2]*ca[2];
01413
01414 d_ca_n= (ca[0]-n[0])*(ca[0]-n[0])+(ca[1]-n[1])*(ca[1]-n[1])+(ca[2]-n[2])*(ca[2]-n[2]);
01415 d_ca_n= sqrt(d_ca_n);
01416 d_ca_c= (ca[0]-c[0])*(ca[0]-c[0])+(ca[1]-c[1])*(ca[1]-c[1])+(ca[2]-c[2])*(ca[2]-c[2]);
01417 d_ca_c= sqrt(d_ca_c);
01418
01419 ntemp = d_ca_n*d_ca_n-2*d_ca_n*D_CB_CA*cos(M_PI*A_CB_CA_N/180.0);
01420 ntemp = (ntemp+ca2-n2)/2;
01421
01422 ctemp = d_ca_c*d_ca_c -2*d_ca_c*D_CB_CA*cos(M_PI*A_CB_CA_C/180.0);
01423 ctemp = (ctemp+ca2-c2)/2;
01424
01425 yz = (ca[0]-c[0])*(ca[1]-n[1])-(ca[0]-n[0])*(ca[1]-c[1]);
01426 if(yz==0) {
01427 Gly_flag = 0;
01428
01429 }
01430 yz0 = (ntemp*(ca[0]-c[0])-ctemp*(ca[0]-n[0]))/yz;
01431 yz = ((n[2]-ca[2])*(ca[0]-c[0])-(c[2]-ca[2])*(ca[0]-n[0]))/yz;
01432
01433 xz0 = (ntemp-yz0*(ca[1]-n[1]))/(ca[0]-n[0]);
01434 xz = ((n[2]-ca[2])-yz*(ca[1]-n[1]))/(ca[0]-n[0]);
01435 if(ca[0]==n[0]) {
01436
01437 Gly_flag = 0;
01438 }
01439
01440 z = 1+xz*xz+yz*yz;
01441 zz = -(xz*(xz0-ca[0])+yz*(yz0-ca[1])-ca[2])/z;
01442 zz0 = (xz0-ca[0])*(xz0-ca[0])+(yz0-ca[1])*(yz0-ca[1])+ca[2]*ca[2]-D_CB_CA*D_CB_CA;
01443
01444 zz0 = (xz*(xz0-ca[0])+yz*(yz0-ca[1])-ca[2])*(xz*(xz0-ca[0])+yz*(yz0-ca[1])-ca[2])-z*zz0;
01445
01446 if(zz0<0) {
01447 Gly_flag = 0;
01448
01449 }
01450 zz0 = sqrt(zz0)/z;
01451
01452
01453 z1 = zz+zz0;
01454 z2 = zz-zz0;
01455 x1 = xz0+xz*z1;
01456 x2 = xz0+xz*z2;
01457 y1 = yz0+yz*z1;
01458 y2 = yz0+yz*z2;
01459
01460
01461
01462 d1 = (x1-o[0])*(x1-o[0])+(y1-o[1])*(y1-o[1])+(z1-o[2])*(z1-o[2]);
01463 d2 = (x2-o[0])*(x2-o[0])+(y2-o[1])*(y2-o[1])+(z2-o[2])*(z2-o[2]);
01464
01465 if(d1>d2){
01466 cb[0]=x1;
01467 cb[1]=y1;
01468 cb[2]=z1;
01469 } else {
01470 cb[0]=x2;
01471 cb[1]=y2;
01472 cb[2]=z2;
01473 }
01474 if (Gly_flag == 0) {
01475 x1=(n[0]+c[0])/2;
01476 y1=(n[1]+c[1])/2;
01477 z1=(n[2]+c[2])/2;
01478 x1=ca[0]-x1;
01479 y1=ca[1]-y1;
01480 z1=ca[2]-z1;
01481 zz=sqrt(x1*x1+y1*y1+z1*z1);
01482 x1=x1*D_CB_CA/zz;
01483 y1=y1*D_CB_CA/zz;
01484 z1=z1*D_CB_CA/zz;
01485 cb[0]=ca[0]+x1;
01486 cb[1]=ca[1]+y1;
01487 cb[2]=ca[2]+z1;
01488 }
01489 if ( res->bb_num[ anCb ] == -1 ) {
01490 res->bb_num[ anCb ] = res->atom_count;
01491 res->atom_count++;
01492 }
01493 res->atom[ res->bb_num[ anCb ] ][0] = cb[0];
01494 res->atom[ res->bb_num[ anCb ] ][1] = cb[1];
01495 res->atom[ res->bb_num[ anCb ] ][2] = cb[2];
01496 return 0;
01497 }
01498
01499
01500
01501
01502
01503 char * TargetStruct::WritePDBText(int start, int stop) {
01504 if ( stop < 0 || stop > this->len)
01505 stop = this->len;
01506 if (start < 0)
01507 start = 0;
01508
01509 char *outtext = NULL;
01510 char buffer[ 4000 ];
01511 int atom_count = 1;
01512 for (int i = start; i < stop; i++) {
01513
01514
01515
01516
01517 for (int j = 0; j < this->structure[i].atom_count; j++) {
01518
01519
01520
01521
01522
01523
01524
01525
01526
01527
01528
01529
01530
01531
01532
01533
01534 char *tmp_text = this->structure[i].WriteAtomLine( j, this->residue[i].RS, i, this->residue[i].chain );
01535 if ( tmp_text ) {
01536 append_to_str(&outtext, tmp_text);
01537 free( tmp_text );
01538 }
01539 }
01540 }
01541 return outtext;
01542 }
01543
01544
01545 int TargetStruct::WritePDB(char *outpath, int start, int stop) {
01546 FILE *file = NULL;
01547
01548 if (outpath == NULL) {
01549 file = stdout;
01550 } else {
01551 file=fopen(outpath, "w");
01552 }
01553 if (file == NULL) {
01554 return 1;
01555 }
01556 char *text = WritePDBText(start, stop);
01557 fprintf(file, "%s", text);
01558 free(text);
01559 if(file != stdout) {
01560 fclose(file);
01561 }
01562 return 0;
01563 }
01564
01565
01566
01567 void TargetStruct::CleanUndefinedResidues(void) {
01568
01569 if ( structure == NULL )
01570 return;
01571
01572 int new_len = 0;
01573 for ( int i = 0; i < len; i++ ) {
01574 if ( structure[i].bb_num[ anCa ] != -1 && structure[i].bb_num[ anN ] != -1 &&
01575 structure[i].bb_num[ anN ] != -1 && structure[i].bb_num[ anO ] != -1 ) {
01576 new_len++;
01577 }
01578 }
01579 if ( new_len != len ) {
01580 ResidueAtoms *old_structure = new ResidueAtoms[ len ];
01581 TargetResidue *old_residue = new TargetResidue[ len ];
01582 for ( int i = 0; i < len; i++ ) {
01583 old_structure[i] = structure[i];
01584 old_residue[i] = residue[i];
01585 }
01586 int old_len = len;
01587 SetLen(new_len);
01588 int cur_res = 0;
01589 for ( int i = 0; i < old_len; i++ ) {
01590 if ( old_structure[i].bb_num[ anCa ] != -1 && old_structure[i].bb_num[ anN ] != -1 &&
01591 old_structure[i].bb_num[ anN ] != -1 && old_structure[i].bb_num[ anO ] != -1 ) {
01592 structure[cur_res] = old_structure[ i ];
01593 residue[cur_res] = old_residue[ i ];
01594 cur_res++;
01595 }
01596 }
01597 }
01598 }
01599
01600
01601
01602 char *TargetStruct::GetSeq(void) {
01603 char *out = (char *)malloc(sizeof(char) * (len+1));
01604 for (int i = 0; i < len; i++) {
01605 out[i] = residue[i].RS;
01606 }
01607 out[ len ] = 0;
01608 return out;
01609 }
01610
01611
01612 float choufasman_a[20] = {
01613 142,
01614 98,
01615 101,
01616 67,
01617 70,
01618 151 ,
01619 111 ,
01620 57 ,
01621 100 ,
01622 108 ,
01623 121 ,
01624 114 ,
01625 145 ,
01626 113 ,
01627 57 ,
01628 77 ,
01629 83 ,
01630 108 ,
01631 69 ,
01632 106
01633 };
01634
01635 float choufasman_b[20] = {
01636 83,
01637 93,
01638 54,
01639 89,
01640 119,
01641 037,
01642 110,
01643 75,
01644 87,
01645 160,
01646 130,
01647 74,
01648 105,
01649 138,
01650 55,
01651 75,
01652 119,
01653 137,
01654 147,
01655 170
01656 };
01657
01658 float choufasman_t[20] = {
01659 66,
01660 95,
01661 146,
01662 156,
01663 119,
01664 74,
01665 98,
01666 156,
01667 95,
01668 47,
01669 59,
01670 101,
01671 60,
01672 60,
01673 152,
01674 143,
01675 96,
01676 96,
01677 114,
01678 50
01679 };
01680
01681 float choufasman_f[20][4] = {
01682 {0.06 , 0.076, 0.035, 0.058},
01683 {0.070, 0.106, 0.099, 0.085},
01684 {0.147, 0.110, 0.179, 0.081},
01685 {0.161, 0.083, 0.191, 0.091},
01686 {0.149, 0.050, 0.117, 0.128},
01687 {0.056, 0.060, 0.077, 0.064},
01688 {0.074, 0.098, 0.037, 0.098},
01689 {0.102, 0.085, 0.190, 0.152},
01690 {0.140, 0.047, 0.093, 0.054},
01691 {0.043, 0.034, 0.013, 0.056},
01692 {0.061, 0.025, 0.036 , 0.070},
01693 {0.055, 0.115, 0.072, 0.095},
01694 {0.068, 0.082, 0.014, 0.055},
01695 {0.059, 0.041, 0.065, 0.065},
01696 {0.102, 0.301, 0.034, 0.068},
01697 {0.120, 0.139, 0.125, 0.106},
01698 {0.086, 0.108, 0.065, 0.079},
01699 {0.077, 0.013, 0.064, 0.167},
01700 {0.082, 0.065, 0.114, 0.125},
01701 {0.062, 0.048, 0.028, 0.053}
01702 };
01703
01704 void TargetStruct::ChouFasman(void) {
01705
01706 for ( int i = 0; i < len-6; i++) {
01707 int a_hit_count = 0;
01708 for ( int j = 0; j < 6; j++ ) {
01709 if ( choufasman_a[ AA1toAANum( residue[i+j].RS ) ] > 100 ) {
01710 a_hit_count++;
01711 }
01712 }
01713 if ( a_hit_count >= 4 ) {
01714
01715 }
01716
01717
01718
01719
01720 }
01721 }
01722
01723
01724 void TargetStruct::MoveMeanToOrigin(void) {
01725 if ( structure == NULL )
01726 return;
01727 vec3 mean = {0,0,0};
01728 int atom_count = 0;
01729 for (int i = 0; i < len; i++) {
01730 ResidueAtoms *res = &structure[i];
01731 for ( int j = 0; j < res->atom_count; j++) {
01732 mean[0] += res->atom[j][0];
01733 mean[1] += res->atom[j][1];
01734 mean[2] += res->atom[j][2];
01735 atom_count++;
01736 }
01737 }
01738 mean[0] /= (double)atom_count;
01739 mean[1] /= (double)atom_count;
01740 mean[2] /= (double)atom_count;
01741 for (int i = 0; i < len; i++) {
01742 ResidueAtoms *res = &structure[i];
01743 for ( int j = 0; j < res->atom_count; j++) {
01744 res->atom[j][0] -= mean[0];
01745 res->atom[j][1] -= mean[1];
01746 res->atom[j][2] -= mean[2];
01747 }
01748 }
01749 }
01750
01751
01752
01753 void TargetStruct::Translate(vec3 trans) {
01754 if (structure == NULL) return;
01755 for (int i = 0; i < len; i++) {
01756 smAdd(trans, structure[i].atom, structure->atom_count);
01757 }
01758 }
01759
01760 void TargetStruct::Rotate (mat3 rotate){
01761 if (structure == NULL) return;
01762 for (int i = 0; i < len; i++) {
01763 smMultMat(rotate, structure[i].atom, structure->atom_count);
01764 }
01765 }
01766
01767
01768 int TargetStruct::GetCaCount(void) {
01769 if ( structure == NULL ) return 0;
01770 return len;
01771 }
01772
01773 vec3 *TargetStruct::GetCaArray(void) {
01774 if ( structure == NULL ) return NULL;
01775 vec3* array = (vec3 *)malloc(sizeof(vec3) * len );
01776 for (int i = 0; i < len; i++) {
01777 smCopy( structure[i].atom[ structure[i].bb_num[ anCa ] ], array[i] );
01778 }
01779 return array;
01780 }
01781
01782
01783