00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <ctype.h>
00006 #include <assert.h>
00007
00008
00009 #include <libxml/tree.h>
00010 #include <libxml/parser.h>
00011 #include <libxml/xpath.h>
00012 #include <libxml/xpathInternals.h>
00013
00014 #include "open_prospect.h"
00015 #include "sm_matrix.h"
00016
00017
00018 extern "C" {
00019 #include "foldrec_net.h"
00020 }
00021
00022
00023
00024
00025
00026
00027
00028
00029 xmlNodePtr get_first_child(xmlNodePtr parent, char *str);
00030 char *get_first_child_content( xmlNodePtr parent, char *str);
00031
00033
00035
00036
00037
00038
00039 ProspectOutput::ProspectOutput(char *path) {
00040 reorder = NULL;
00041 sort_val = NULL;
00042 output_xml_handle = NULL;
00043 if (path != NULL)
00044 Load(path);
00045 }
00046
00047
00048 ProspectOutput::~ProspectOutput() {
00049 Free();
00050 }
00051
00052
00053 void ProspectOutput::Free(void){
00054 if (output_xml_handle != NULL)
00055 xmlFreeDoc( (xmlDocPtr) output_xml_handle );
00056 output_xml_handle = NULL;
00057 if (reorder != NULL)
00058 free(reorder);
00059 reorder = NULL;
00060 if ( sort_val )
00061 delete sort_val;
00062 sort_val = NULL;
00063 }
00064
00065
00066
00067 int ProspectOutput::Load( char *path) {
00068 output_xml_handle = xmlParseFile( path );
00069 if (output_xml_handle == NULL) {
00070
00071 return 1;
00072 }
00073
00074 return 0;
00075 }
00076
00077
00078 int ProspectOutput::AddThreadingInfo( ProspectThreadingInfo *thread_info ) {
00079 if ( output_xml_handle == NULL ) {
00080 output_xml_handle = (void *)xmlNewDoc( (xmlChar *)"1.0" );
00081 xmlDocSetRootElement( (xmlDoc *)output_xml_handle, xmlNewNode( NULL, (xmlChar *)"prospectOutput" ));
00082 }
00083 if ( thread_info->xml_node == NULL ) {
00084 return 1;
00085 }
00086 xmlNodePtr new_node = xmlCopyNode( (xmlNode *)thread_info->xml_node, 1);
00087 xmlNodePtr root_node = xmlDocGetRootElement( (xmlDoc *)output_xml_handle );
00088 xmlAddChild(root_node, new_node);
00089 return 0;
00090 }
00091
00092
00093
00094 int ProspectOutput::AddThreadingInfo( ProspectParam *param,
00095 WeightArray weight,
00096 TemplateStruct *template_data,
00097 TargetStruct *target_data,
00098 ScoreStruct *score,
00099 AlignmentStruct *alignment) {
00100
00101 if ( output_xml_handle == NULL ) {
00102 output_xml_handle = (void *)xmlNewDoc( (xmlChar *)"1.0" );
00103 xmlDocSetRootElement( (xmlDoc *)output_xml_handle, xmlNewNode( NULL, (xmlChar *)"prospectOutput" ));
00104 }
00105
00106 char *xml_text = alignment->Write_XML(param,
00107 weight,
00108 template_data,
00109 target_data,
00110 score);
00111 if ( xml_text != NULL ) {
00112 xmlNodePtr root_node = xmlDocGetRootElement( (xmlDoc *)output_xml_handle );
00113 xmlDocPtr tmp_doc = xmlParseMemory (xml_text, strlen( xml_text ) );
00114 xmlNodePtr new_node = xmlDocGetRootElement( tmp_doc );
00115 xmlUnlinkNode(new_node);
00116 xmlAddChild (root_node,new_node);
00117 xmlFreeDoc( tmp_doc );
00118 free( xml_text );
00119 }
00120 return 0;
00121 }
00122
00123
00124
00125
00126 int ProspectOutput::Save( char *path ) {
00127 return xmlSaveFile( path, (xmlDoc *)output_xml_handle);
00128 }
00129
00130
00131
00132 int ProspectOutput::GetThreadingCount(void ) {
00133 xmlDocPtr doc = (xmlDocPtr) output_xml_handle;
00134 if ( doc == NULL )
00135 return 0;
00136 xmlNodePtr cur_node = xmlDocGetRootElement(doc);
00137 if (cur_node == NULL)
00138 return 0;
00139 if ( !strcmp((char *)cur_node->name, "prospectOutput") ) {
00140 cur_node = cur_node->children;
00141 }
00142 if (cur_node == NULL)
00143 return 0;
00144 int count = 0;
00145 while (cur_node != NULL) {
00146 if (!strcmp((char *)cur_node->name, "threading")) {
00147 count++;
00148 }
00149 cur_node = cur_node->next;
00150 }
00151 return count;
00152 }
00153
00154
00155 char * ProspectOutput::GetTemplateName( int N) {
00156 xmlDocPtr doc = (xmlDocPtr) output_xml_handle;
00157 xmlNodePtr cur_node = xmlDocGetRootElement(doc);
00158 if ( !strcmp((char *)cur_node->name, "prospectOutput") ) {
00159 cur_node = cur_node->children;
00160 }
00161 int count = 0;
00162 while (cur_node != NULL) {
00163 if (!strcmp((char *)cur_node->name, "threading")) {
00164 if ( count == N ) {
00165 return (char *)xmlGetProp(cur_node, (xmlChar *)"template");
00166 }
00167 count++;
00168 }
00169 cur_node = cur_node->next;
00170 }
00171 return NULL;
00172 }
00173
00174
00175 int ProspectOutput::GetTemplateNum( char *name) {
00176 xmlDocPtr doc = (xmlDocPtr) output_xml_handle;
00177 xmlNodePtr cur_node = xmlDocGetRootElement(doc);
00178 if ( !strcmp((char *)cur_node->name, "prospectOutput") ) {
00179 cur_node = cur_node->children;
00180 }
00181 int count = 0, match_num = -1;
00182 while (cur_node != NULL) {
00183 if (!strcmp((char *)cur_node->name, "threading")) {
00184 char *cur_name = (char *)xmlGetProp(cur_node, (xmlChar *)"template");
00185 if (cur_name) {
00186 if ( !strcmp( cur_name, name ) ) {
00187 match_num = count;
00188 cur_node = NULL;
00189 }
00190 free(cur_name);
00191 }
00192 count++;
00193 }
00194 if ( cur_node != NULL)
00195 cur_node = cur_node->next;
00196 }
00197 return match_num;
00198 }
00199
00200
00201 void ProspectOutput::GetRawNormalize( float &mean, float &sd ) {
00202 xmlDocPtr doc = (xmlDocPtr) output_xml_handle;
00203 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
00204 char searchpath[200] = "/prospectOutput/threading/score/raw";
00205 xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath,
00206 xpathCtx);
00207 if(xpathObj == NULL) {
00208 xmlXPathFreeContext(xpathCtx);
00209 return;
00210 }
00211 int size = xpathObj->nodesetval->nodeNr;
00212 if ( size > 0 ) {
00213 float *raw_array = new float[ size ];
00214 for(int i = 0; i < size; i++) {
00215 char * text = (char *)xmlNodeGetContent(xpathObj->nodesetval->nodeTab[i]);
00216 raw_array[i] = atof( text );
00217 free(text);
00218 }
00219
00220 xmlXPathFreeObject(xpathObj);
00221 xmlXPathFreeContext(xpathCtx);
00222 double total = 0.0;
00223 for (int i = 0; i < size; i++) {
00224 total += raw_array[i];
00225 }
00226 mean = total / (double)size;
00227 total = 0.0;
00228 for (int i = 0; i < size; i++) {
00229 total += pow( raw_array[i] - mean, 2 );
00230 }
00231 sd = sqrt( (total / (double)size) );
00232 delete raw_array;
00233 } else {
00234 mean = 0;
00235 sd = 0;
00236 }
00237 }
00238
00239
00240 xmlNodePtr Prospect_OutGetThreadingNode( xmlDocPtr output_xml_handle, int N) {
00241 xmlDocPtr doc = (xmlDocPtr) output_xml_handle;
00242 xmlNodePtr cur_node = xmlDocGetRootElement(doc);
00243 if ( !strcmp((char *)cur_node->name, "prospectOutput") ) {
00244 cur_node = cur_node->children;
00245 }
00246 int count = 0;
00247 while (cur_node != NULL) {
00248 if (!strcmp((char *)cur_node->name, "threading")) {
00249 if ( count == N ) {
00250 return cur_node;
00251 }
00252 count++;
00253 }
00254 cur_node = cur_node->next;
00255 }
00256 return NULL;
00257 }
00258
00259
00260 typedef struct {
00261 int rank;
00262 float value;
00263 } sort_feature;
00264
00265 int cmp_sort_feature(const void *arg1, const void *arg2) {
00266 sort_feature *stat1 = (sort_feature*)arg1,
00267 *stat2 = (sort_feature*)arg2;
00268 if ( stat1->value < stat2->value )
00269 return 1;
00270 else
00271 return -1;
00272 return 0;
00273 }
00274
00275
00276 void ProspectOutput::Sort(char *eqn, bool minimize) {
00277 ThreadFeatureParse *feature_eqn = ThreadFeatureParse::CompileParse( eqn );
00278 int N = GetThreadingCount();
00279 sort_feature *features = (sort_feature *)malloc(sizeof(sort_feature) * N );
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289 if ( sort_val == NULL ) {
00290 sort_val = new float[ N ];
00291 }
00292 float *tmp_array = feature_eqn->RunParse( this );
00293 for (int i = 0; i < N; i++) {
00294 features[i].value = tmp_array[i];
00295
00296 sort_val[i] = tmp_array[i];
00297 if (minimize)
00298 features[i].value = -(features[i].value);
00299 features[i].rank = i;
00300 }
00301 free( tmp_array );
00302
00303 qsort( features, N, sizeof(sort_feature), cmp_sort_feature );
00304 if ( reorder )
00305 free( reorder );
00306 reorder = (int *)malloc(sizeof(int) * N );
00307 for (int i = 0; i < N; i++) {
00308 reorder[i] = features[i].rank;
00309 }
00310 free(features);
00311 delete feature_eqn;
00312 }
00313
00314
00315 void ProspectOutput::Sort(ThreadFeatureParse *sort_func, bool minimize) {
00316 int N = GetThreadingCount();
00317 sort_feature *features = (sort_feature *)malloc(sizeof(sort_feature) * N );
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328 float *tmp_array = sort_func->RunParse( this );
00329 for (int i = 0; i < N; i++) {
00330 features[i].value = tmp_array[i];
00331 if (minimize)
00332 features[i].value = -(features[i].value);
00333 features[i].rank = i;
00334 }
00335 free( tmp_array );
00336
00337 qsort( features, N, sizeof(sort_feature), cmp_sort_feature );
00338 if ( reorder )
00339 free( reorder );
00340 reorder = (int *)malloc(sizeof(int) * N );
00341 for (int i = 0; i < N; i++) {
00342 reorder[i] = features[i].rank;
00343 }
00344 free(features);
00345 }
00346
00347
00348
00349 void ProspectOutput::Sort(int method) {
00350 int N = GetThreadingCount();
00351 sort_feature *features = (sort_feature *)malloc(sizeof(sort_feature) * N );
00352
00353 xmlDocPtr doc = (xmlDocPtr) output_xml_handle;
00354 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
00355 if(xpathCtx == NULL) {
00356 return;
00357 }
00358
00359 char searchpath[200] = "/prospectOutput/threading/score/z";
00360
00361 if (method == ScoreStruct::SCORE_RAW)
00362 strcpy( searchpath, "/prospectOutput/threading/score/raw" );
00363 if (method == ScoreStruct::SCORE_Z)
00364 strcpy( searchpath, "/prospectOutput/threading/score/z" );
00365 if (method == ScoreStruct::SCORE_ZFULL)
00366 strcpy( searchpath, "/prospectOutput/threading/score/zfull" );
00367 if (method == ScoreStruct::SCORE_NN)
00368 strcpy( searchpath, "/prospectOutput/threading/score/nn" );
00369 if (method == ScoreStruct::SCORE_ZMUTATIONLOG)
00370 strcpy( searchpath, "/prospectOutput/threading/score/zmutationlog" );
00371 if (method == ScoreStruct::SCORE_ZSINGLETON)
00372 strcpy( searchpath, "/prospectOutput/threading/score/zsingleton" );
00373 if (method == ScoreStruct::SCORE_ZSEC_STRUCT)
00374 strcpy( searchpath, "/prospectOutput/threading/score/zsec_struct" );
00375 if (method == ScoreStruct::SCORE_ZTWOBODY)
00376 strcpy( searchpath, "/prospectOutput/threading/score/ztwobody" );
00377 if (method == ScoreStruct::SCORE_ZDFIRE)
00378 strcpy( searchpath, "/prospectOutput/threading/score/zdfire" );
00379
00380
00381 xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath,
00382 xpathCtx);
00383 if(xpathObj == NULL) {
00384 xmlXPathFreeContext(xpathCtx);
00385 return;
00386 }
00387
00388 int size = xpathObj->nodesetval->nodeNr;
00389
00390 for(int i = 0; i < size; i++) {
00391 char * text = (char *)xmlNodeGetContent(xpathObj->nodesetval->nodeTab[i]);
00392 features[i].value = atof( text );
00393 if ( method == ScoreStruct::SCORE_RAW)
00394 features[i].value = -(features[i].value);
00395 features[i].rank = i;
00396 free(text);
00397 }
00398
00399 xmlXPathFreeObject(xpathObj);
00400 xmlXPathFreeContext(xpathCtx);
00401 qsort( features, size, sizeof(sort_feature), cmp_sort_feature );
00402 if ( reorder )
00403 free( reorder );
00404 reorder = (int *)malloc(sizeof(int) * size );
00405 for (int i = 0; i < size; i++) {
00406 reorder[i] = features[i].rank;
00407 }
00408 free(features);
00409 }
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434 void ProspectOutput::SetSortVal( int i, float val ) {
00435 if ( sort_val == NULL ) {
00436 sort_val = new float[ GetThreadingCount() ];
00437 }
00438 int num = i;
00439 sort_val[ num ] = val;
00440 }
00441
00442
00443
00444 float ProspectOutput::GetSortVal( int i ) {
00445 if (sort_val == NULL)
00446 return 0;
00447 int num = i;
00448 return sort_val[ num ];
00449 }
00450
00451
00452
00453 double get_original_energy( xmlNodePtr node, char *name ) {
00454 xmlNodePtr child;
00455 char *text;
00456 double value;
00457
00458 child = get_first_child( node, name );
00459 if (child) {
00460 text = (char *)xmlGetProp(child, (xmlChar *)"e");
00461 value = atof(text);
00462 free(text);
00463 return value;
00464 }
00465 return 0;
00466 }
00467
00468 double get_weights( xmlNodePtr node, char *name ) {
00469 xmlNodePtr child;
00470 char *text;
00471 double value;
00472
00473 child = get_first_child( node, name );
00474 if (child) {
00475 text = (char *)xmlNodeGetContent(child);
00476 value = atof(text);
00477 free(text);
00478 return value;
00479 }
00480 return 0;
00481 }
00482
00483
00484
00485 int ExtractNodeThreadingInfo( xmlNodePtr threading_node, ProspectThreadingInfo *thread_info) {
00486 thread_info->template_name = (char *)xmlGetProp(threading_node, (xmlChar *)"template");
00487
00488 thread_info->xml_node = (void *)threading_node;
00489
00490 xmlNodePtr score_node = get_first_child( threading_node, "score");
00491
00492 thread_info->scores.mask = 0x00;
00493 for ( int i = 0; i < ScoreStruct::SCORE_COUNT_TOTAL; i++) {
00494 xmlNodePtr score_val_node = get_first_child( score_node, ScoreStruct::GetScoreName(i) );
00495 if (score_val_node != NULL) {
00496 char *ptr = (char *)xmlNodeGetContent(score_val_node);
00497 thread_info->scores.values[ i ] = atof( ptr ) ;
00498 free(ptr);
00499
00500 ptr = (char *)xmlGetProp(score_val_node, (xmlChar *)"mean");
00501 if ( ptr ) {
00502 thread_info->scores.z_mean[ i ] = atof( ptr ) ;
00503 free(ptr);
00504 }
00505 ptr = (char *)xmlGetProp(score_val_node, (xmlChar *)"sd");
00506 if ( ptr ) {
00507 thread_info->scores.z_sd[ i ] = atof( ptr ) ;
00508 free(ptr);
00509 }
00510 thread_info->scores.mask |= (0x01<<i);
00511 }
00512 }
00513
00514
00515 xmlNodePtr alignment_node = get_first_child( threading_node, "alignment" );
00516 xmlNodePtr target_ss_node = get_first_child( alignment_node, "query_ss_" );
00517 xmlNodePtr target_seq_node = get_first_child( alignment_node, "query_seq" );
00518 xmlNodePtr match_node = get_first_child( alignment_node, "match_str" );
00519 xmlNodePtr template_ss_node = get_first_child( alignment_node, "templ_ss_" );
00520 xmlNodePtr template_seq_node = get_first_child( alignment_node, "templ_seq" );
00521 xmlNodePtr template_energy_node = get_first_child( threading_node, "energy");
00522 xmlNodePtr template_weight_node = get_first_child(threading_node, "weights");
00523 xmlNodePtr template_core_node = get_first_child(alignment_node, "core_str_");
00524
00525 thread_info->align_target_seq = (char *)xmlNodeGetContent( target_seq_node );
00526 thread_info->align_target_ss = (char *)xmlNodeGetContent( target_ss_node );
00527 thread_info->align_match = (char *)xmlNodeGetContent( match_node );
00528 thread_info->align_template_seq = (char *)xmlNodeGetContent( template_seq_node );
00529 thread_info->align_template_ss = (char *)xmlNodeGetContent( template_ss_node );
00530
00531 char *core_align = (char *)xmlNodeGetContent( template_core_node );
00532
00533 for (int j = 0; j < e_count; j++) {
00534 thread_info->energy[j] = get_original_energy( template_energy_node, e_names[j]);
00535 }
00536 for (int j = 0; j < e_count; j++) {
00537 thread_info->weight[j] = get_weights( template_weight_node, e_names[j]);
00538 }
00539
00540 int target_pos = 0;
00541 thread_info->target_len = 0;
00542 while( thread_info->align_target_seq[ target_pos ] != 0 ) {
00543 if ( thread_info->align_target_seq[ target_pos ] != '-' ) {
00544 thread_info->target_len++;
00545 }
00546 target_pos++;
00547 }
00548
00549 int template_pos = 0;
00550 thread_info->template_len = 0;
00551 while( thread_info->align_template_seq[ template_pos ] != 0 ) {
00552 if ( thread_info->align_template_seq[ template_pos ] != '-' ) {
00553 thread_info->template_len++;
00554 }
00555 template_pos++;
00556 }
00557
00558
00559 thread_info->target_rs = (char *)malloc( thread_info->target_len );
00560 thread_info->target_ss = (char *)malloc( thread_info->target_len );
00561 thread_info->template_rs = (char *)malloc( thread_info->template_len );
00562 thread_info->template_ss = (char *)malloc( thread_info->template_len );
00563
00564 char *core_rs = (char *)malloc( thread_info->template_len );
00565
00566
00567
00568
00569
00570 int * target_align = (int *)malloc(sizeof(int) * thread_info->target_len );
00571 int * template_align = (int *)malloc(sizeof(int) * thread_info->template_len );
00572
00573 template_pos = 0;
00574 target_pos = 0;
00575 int read_len = strlen(thread_info->align_target_seq);
00576 for (int i = 0; i < read_len ; i++) {
00577 if ( target_pos < thread_info->target_len )
00578 target_align[ target_pos ] = -1;
00579 if ( template_pos < thread_info->template_len )
00580 template_align[ template_pos ] = -1;
00581
00582 if ( thread_info->align_target_seq[i] != '-' && thread_info->align_template_seq[i] != '-' ) {
00583 assert( target_pos < thread_info->target_len );
00584 assert( template_pos < thread_info->template_len );
00585 target_align[ target_pos ] = template_pos;
00586 template_align[ template_pos ] = target_pos;
00587 }
00588 if ( thread_info->align_target_seq[i] != '-' ) {
00589 assert( target_pos < thread_info->target_len );
00590 thread_info->target_rs[ target_pos ] = thread_info->align_target_seq[i];
00591 thread_info->target_ss[ target_pos ] = thread_info->align_target_ss[i];
00592 target_pos++;
00593 }
00594 if ( thread_info->align_template_seq[i] != '-' ) {
00595 assert( template_pos < thread_info->template_len );
00596 thread_info->template_rs[ template_pos ] = thread_info->align_template_seq[i];
00597 thread_info->template_ss[ template_pos ] = thread_info->align_template_ss[i];
00598 core_rs[ template_pos ] = core_align[i];
00599 template_pos++;
00600 }
00601 }
00602 thread_info->align.SetAlign( thread_info->target_len, target_align, thread_info->template_len, template_align );
00603
00604
00605 thread_info->core_count = 0;
00606 for (int i = 0; i < thread_info->template_len; i++) {
00607 if ( core_rs[i] == '-' ) {
00608 if (i == 0 || core_rs[i-1] == ' ') {
00609 thread_info->core_count++;
00610 }
00611 }
00612 }
00613
00614 thread_info->core = (TemplateCoreDefinition *)malloc(sizeof(TemplateCoreDefinition) * thread_info->core_count );
00615
00616 template_pos = 0;
00617 for (int i = 0; i < thread_info->template_len; i++) {
00618 if ( core_rs[i] == '-' ) {
00619 if (i == 0 || core_rs[i-1] == ' ') {
00620 thread_info->core[ template_pos ].head = i;
00621 template_pos++;
00622 }
00623 }
00624 if (core_rs[i] == ' ') {
00625 if (i > 0 && core_rs[i-1] == '-') {
00626 thread_info->core[ template_pos-1 ].len = i - thread_info->core[ template_pos-1 ].head;
00627 }
00628 }
00629 }
00630 if ( core_rs[ thread_info->template_len-1 ] == '-' ) {
00631 thread_info->core[ template_pos-1 ].len =
00632 thread_info->template_len - thread_info->core[ template_pos-1 ].head;
00633 }
00634
00635 free(core_rs);
00636 free(core_align);
00637
00638 thread_info->core_res_count = 0;
00639 for (int i = 0; i < thread_info->core_count; i++) {
00640 thread_info->core_res_count += thread_info->core[ i ].len;
00641 }
00642
00643
00644
00645
00646
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659
00660
00661
00662
00663
00664
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680
00681 {
00682 xmlNodePtr feature_node = get_first_child( threading_node, "features" );
00683 if ( feature_node ) {
00684 for ( int i = 0; i < thread_info->features.Count(); i++ ) {
00685 char *text = get_first_child_content( feature_node, thread_info->features.GetName(i) );
00686 if ( text != NULL ) {
00687 thread_info->features.Set(i, atof( text ) );
00688 free( text );
00689 }
00690 }
00691 }
00692 }
00693
00694
00695 xmlNodePtr sidechain_node = get_first_child( threading_node, "sidechain_pack" );
00696 if ( sidechain_node != NULL ) {
00697 xmlNodePtr lret_node = get_first_child( sidechain_node, "lret" );
00698 if ( lret_node ) {
00699 char *tmp_ptr = (char *)xmlNodeGetContent( lret_node );
00700 thread_info->lret = atof(tmp_ptr);
00701 free(tmp_ptr);
00702 } else {
00703 thread_info->lret = 100000;
00704 }
00705 xmlNodePtr atom_count_node = get_first_child( sidechain_node, "atom_count" );
00706 if ( lret_node ) {
00707 char *tmp_ptr = (char *)xmlNodeGetContent( atom_count_node );
00708 thread_info->atom_count = atoi(tmp_ptr);
00709 free(tmp_ptr);
00710 } else {
00711 thread_info->atom_count = 0;
00712 }
00713 } else {
00714 thread_info->lret = 100000;
00715 thread_info->atom_count = 0;
00716 }
00717
00718 xmlNodePtr pdb_node = get_first_child( threading_node, "pdb" );
00719 if (pdb_node != NULL) {
00720 thread_info->residue_atoms = (ResidueAtoms *)malloc(sizeof(ResidueAtoms) * thread_info->target_len);
00721 for (int z = 0; z < thread_info->target_len; z++)
00722 thread_info->residue_atoms[z].atom_count = 0;
00723 char *read_buffer = (char *)xmlNodeGetContent(pdb_node);
00724 char *cur_line = read_buffer;
00725 int res_num = -1;
00726 char cur_residue_name[30];
00727 strcpy(cur_residue_name, "");
00728 char tmp_x_str[30], tmp_y_str[30], tmp_z_str[30];
00729 strcpy(tmp_x_str, "");
00730 strcpy(tmp_y_str, "");
00731 strcpy(tmp_z_str, "");
00732 while(cur_line[0] != 0) {
00733 if ( !strncmp(cur_line, "ATOM", 4) ) {
00734 char tmp_str[50];
00735 strncpy( tmp_str, &(cur_line[22]), 5 );
00736 tmp_str[5] = 0;
00737 if (strcmp( cur_residue_name, tmp_str ) ) {
00738
00739
00740 char residue_id[10];
00741 strncpy( residue_id, &(cur_line[22]), 5 );
00742 residue_id[5] = 0;
00743
00744 res_num = atoi( residue_id );
00745 thread_info->residue_atoms[res_num].atom_count = 0;
00746 strcpy( cur_residue_name, tmp_str );
00747 }
00748 if (res_num >= 0) {
00749 strncpy(tmp_x_str, &(cur_line[30]), 8);
00750 tmp_x_str[8] = 0;
00751 strncpy(tmp_y_str, &(cur_line[38]), 8);
00752 tmp_y_str[8] = 0;
00753 strncpy(tmp_z_str, &(cur_line[46]), 8);
00754 tmp_z_str[8] = 0;
00755
00756 thread_info->residue_atoms[res_num].atom[thread_info->residue_atoms[res_num].atom_count][0] = atof(tmp_x_str);
00757 thread_info->residue_atoms[res_num].atom[thread_info->residue_atoms[res_num].atom_count][1] = atof(tmp_y_str);
00758 thread_info->residue_atoms[res_num].atom[thread_info->residue_atoms[res_num].atom_count][2] = atof(tmp_z_str);
00759
00760
00761 strncpy( tmp_str, &(cur_line[60]), 6);
00762 tmp_str[6] = 0;
00763 thread_info->residue_atoms[res_num].temp_factor[thread_info->residue_atoms[res_num].atom_count] = atof( tmp_str );
00765
00766
00767
00768
00769
00770
00771 int atom_namenum = PDBAtomtoNum( &(cur_line[13]) );
00772 thread_info->residue_atoms[res_num].atom_name[ thread_info->residue_atoms[res_num].atom_count ] =
00773 atom_namenum;
00774 if ( atom_namenum == anN || atom_namenum == anCa ||
00775 atom_namenum == anC || atom_namenum == anO ) {
00776 thread_info->residue_atoms[res_num].bb_num[ atom_namenum ] = thread_info->residue_atoms[res_num].atom_count;
00777 }
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788 thread_info->residue_atoms[res_num].atom_count++;
00789 }
00790 }
00791 if ( !strncmp(cur_line, "ENDMDL", 6) ) {
00792 break;
00793 }
00794
00795 while (cur_line[0] != '\n' && cur_line[0] != 0) {
00796 cur_line++;
00797 }
00798 if (cur_line[0] == '\n')
00799 cur_line++;
00800 }
00801 free(read_buffer);
00802 } else {
00803 thread_info->residue_atoms = NULL;
00804 }
00805 return 0;
00806 }
00807
00808
00809
00810 ProspectThreadingInfo ProspectOutput::GetThreadingInfo(int N) {
00811 ProspectThreadingInfo my_thread;
00812 GetThreadingInfo( N, &my_thread );
00813 return my_thread;
00814 }
00815
00816
00817 int ProspectOutput::GetThreadingInfo( const char *name, ProspectThreadingInfo *thread_info) {
00818 xmlDocPtr doc = (xmlDocPtr) output_xml_handle;
00819 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
00820 if(xpathCtx == NULL) {
00821 return 1;
00822 }
00823 char searchpath[2000];
00824 sprintf(searchpath, "/prospectOutput/threading[@template='%s']", name);
00825 xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath, xpathCtx);
00826 if(xpathObj == NULL) {
00827 xmlXPathFreeContext(xpathCtx);
00828 return 1;
00829 }
00830 int size = xpathObj->nodesetval->nodeNr;
00831 if ( size == 0 )
00832 return 1;
00833 ExtractNodeThreadingInfo( xpathObj->nodesetval->nodeTab[0], thread_info);
00834 thread_info->parent = this;
00835 xmlXPathFreeObject(xpathObj);
00836 xmlXPathFreeContext(xpathCtx);
00837 return 0;
00838 }
00839
00840 int ProspectOutput::GetThreadingInfo( int N, ProspectThreadingInfo *thread_info) {
00841 xmlNodePtr threading_node = Prospect_OutGetThreadingNode( (xmlDocPtr)output_xml_handle, N);
00842 if ( NULL == threading_node ) {
00843 return 1;
00844 }
00845 ExtractNodeThreadingInfo( threading_node, thread_info);
00846 thread_info->parent = this;
00847 return 0;
00848 }
00849
00850
00851 int ProspectOutput::Get_PDBText(int N, ProspectThreadingInfo *thread_info) {
00852 xmlNodePtr threading_node = Prospect_OutGetThreadingNode((xmlDocPtr)output_xml_handle, N);
00853 if ( NULL == threading_node ) {
00854 return 1;
00855 }
00856 xmlNodePtr sidechain_node = get_first_child( threading_node, "sidechain_pack" );
00857 if ( sidechain_node != NULL ) {
00858 xmlNodePtr pdb_node = get_first_child( sidechain_node, "pdb" );
00859 if ( pdb_node != NULL ) {
00860 thread_info->pdb_text = (char *)xmlNodeGetContent( pdb_node );
00861 }
00862 }
00863 return 0;
00864 }
00865
00866
00867 ProspectThreadingInfo::ProspectThreadingInfo(){
00868 parent = NULL;
00869 align_target_seq = NULL;
00870 align_target_ss = NULL;
00871 align_match = NULL;
00872 align_template_seq = NULL;
00873 align_template_ss = NULL;
00874 template_name = NULL;
00875 template_rs = NULL;
00876 pdb_text = NULL;
00877 target_rs = NULL;
00878 template_ss = NULL;
00879 target_ss = NULL;
00880 core = NULL;
00881 xml_node = NULL;
00882 }
00883
00884
00885 ProspectThreadingInfo::~ProspectThreadingInfo() {
00886 Free();
00887 }
00888
00889
00890 void ProspectThreadingInfo::Free(void) {
00891 if (align_target_seq != NULL)
00892 free( align_target_seq );
00893 align_target_seq = NULL;
00894 if (align_target_ss != NULL)
00895 free( align_target_ss );
00896 align_target_ss = NULL;
00897 if (align_match != NULL)
00898 free( align_match );
00899 align_match = NULL;
00900 if (align_template_seq != NULL)
00901 free( align_template_seq );
00902 align_template_seq = NULL;
00903 if (align_template_ss != NULL)
00904 free( align_template_ss );
00905 align_template_ss = NULL;
00906 if (template_name != NULL)
00907 free( template_name );
00908 template_name = NULL;
00909 if (template_rs != NULL)
00910 free(template_rs);
00911 template_rs = NULL;
00912 if (target_rs != NULL)
00913 free(target_rs);
00914 target_rs = NULL;
00915 if (template_ss != NULL)
00916 free(template_ss);
00917 template_ss = NULL;
00918 if (target_ss != NULL)
00919 free(target_ss);
00920 target_ss = NULL;
00921 if (core != NULL)
00922 free(core);
00923 core = NULL;
00924 if (pdb_text != NULL)
00925 free(pdb_text);
00926 pdb_text = NULL;
00927 if (residue_atoms != NULL)
00928 free(residue_atoms);
00929 residue_atoms = NULL;
00930
00931 parent = NULL;
00932 }
00933
00934
00935
00936 void ProspectThreadingInfo::SetScoreExtra(int extra_num, pValType val ) {
00937
00938 int score_num = -1;
00939 switch (extra_num) {
00940 case 0: {
00941 score_num = ScoreStruct::SCORE_USER0;
00942 break;
00943 }
00944 case 1: {
00945 score_num = ScoreStruct::SCORE_USER1;
00946 break;
00947 }
00948 case 2: {
00949 score_num = ScoreStruct::SCORE_USER2;
00950 break;
00951 }
00952 case 3: {
00953 score_num = ScoreStruct::SCORE_USER3;
00954 break;
00955 }
00956 };
00957 if ( score_num == -1 )
00958 return;
00959
00960 scores.SetScore( score_num, val );
00961
00962 if ( xml_node == NULL )
00963 return;
00964
00965
00966 xmlNodePtr threading_node = (xmlNodePtr) xml_node;
00967 xmlNodePtr score_node = get_first_child( threading_node, "score");
00968 xmlNodePtr score_val_node = get_first_child( score_node, ScoreStruct::GetScoreName( score_num ) );
00969
00970 if ( score_val_node != NULL ) {
00971 xmlUnlinkNode(score_val_node);
00972 xmlFreeNode(score_val_node);
00973 }
00974 char buffer[1000];
00975 sprintf(buffer, "%g", val);
00976 score_val_node = xmlNewChild(score_node, NULL, (xmlChar *)ScoreStruct::GetScoreName( score_num ), (xmlChar *)buffer);
00977 }
00978
00979
00980
00981 pValType ProspectThreadingInfo::GetScoreExtra(int extra_num ) {
00982 if (extra_num == 0) return scores.GetScore( ScoreStruct::SCORE_USER0 );
00983 if (extra_num == 1) return scores.GetScore( ScoreStruct::SCORE_USER1 );
00984 if (extra_num == 2) return scores.GetScore( ScoreStruct::SCORE_USER2 );
00985 if (extra_num == 3) return scores.GetScore( ScoreStruct::SCORE_USER3 );
00986 return 0;
00987 }
00988
00989
00990 ProspectThreadingInfo::ProspectThreadingInfo(const ProspectThreadingInfo &rhs) {
00991 Free();
00992 Copy(rhs);
00993 }
00994
00995 ProspectThreadingInfo & ProspectThreadingInfo::operator =(const ProspectThreadingInfo &rhs) {
00996 Free();
00997 Copy(rhs);
00998 return *this;
00999 }
01000
01001
01002
01003 void ProspectThreadingInfo::Copy( const ProspectThreadingInfo &rhs) {
01004 memcpy( this, &rhs, sizeof( ProspectThreadingInfo ) );
01005 if ( rhs.align_target_seq )
01006 align_target_seq = strdup( rhs.align_target_seq );
01007 if ( rhs.align_target_ss )
01008 align_target_ss = strdup( rhs.align_target_ss );
01009 if( rhs.align_match )
01010 align_match = strdup( rhs.align_match );
01011 if( rhs.align_template_seq )
01012 align_template_seq = strdup( rhs.align_template_seq );
01013 if( rhs.align_template_ss )
01014 align_template_ss = strdup( rhs.align_template_ss );
01015 if( rhs.template_name )
01016 template_name = strdup( rhs.template_name );
01017 if( rhs.template_rs )
01018 template_rs = strdup( rhs.template_rs );
01019 if( rhs.target_rs )
01020 target_rs = strdup( rhs.target_rs );
01021 if( rhs.template_ss )
01022 template_ss = strdup( rhs.template_ss );
01023 if( rhs.target_ss )
01024 target_ss = strdup( rhs.target_ss );
01025 if ( rhs.pdb_text )
01026 pdb_text = strdup( rhs.pdb_text );
01027
01028 if ( core_count > 0 ) {
01029 core = (TemplateCoreDefinition *)malloc(sizeof(TemplateCoreDefinition) * core_count);
01030 if ( core )
01031 memcpy( core, rhs.core, sizeof(TemplateCoreDefinition) * core_count );
01032 } else {
01033 core = NULL;
01034 }
01035 if ( atom_count > 0 ) {
01036 residue_atoms = (ResidueAtoms *)malloc(sizeof(ResidueAtoms) * atom_count);
01037 if ( residue_atoms )
01038 memcpy( residue_atoms, rhs.residue_atoms, sizeof(ResidueAtoms) * atom_count );
01039 } else {
01040 residue_atoms = NULL;
01041 }
01042 align = (AlignmentStruct) rhs.align;
01043 parent = rhs.parent;
01044 for ( int i=0; i < e_count; i++ ) {
01045 weight[i] = rhs.weight[i];
01046 energy[i] = rhs.energy[i];
01047 }
01048 }
01049
01050
01051 pValType ProspectThreadingInfo::Score_Calc_NN(void ) {
01052
01053 float nn_input[14];
01054 nn_input[0] = this->scores.values[ ScoreStruct::SCORE_RAW ];
01055 nn_input[1] = this->energy[ e_mutationlog ];
01056 nn_input[2] = this->energy[ e_singleton ];
01057 nn_input[3] = this->energy[ e_gapconst ] + this->energy[ e_gapopen ];
01058 nn_input[4] = this->energy[ e_sec_struct ];
01059 nn_input[5] = this->energy[ e_dfire ];
01060 nn_input[6] = this->scores.values[ ScoreStruct::SCORE_Z ];
01061 nn_input[7] = this->target_len;
01062 nn_input[8] = this->template_len;
01063 nn_input[9] = this->core_count;
01064 nn_input[10] = this->features.Get( AlignFeatures::fn_align_core_res );
01065 nn_input[11] = this->features.Get( AlignFeatures::fn_align_core );
01066 nn_input[12] = this->features.Get( AlignFeatures::fn_align );
01067 nn_input[13] = this->features.Get( AlignFeatures::fn_ident );
01068 float nn_output;
01069 foldrec_net( nn_input, &nn_output, 1 );
01070 return nn_output;
01071 }
01072
01073
01074 pValType ProspectThreadingInfo::GetAveRaw(void) {
01075 pValType total = 0;
01076 pValType nalign = features.Get( AlignFeatures::fn_align );
01077 pValType nalign_pair = features.Get( AlignFeatures::fn_align_pair );
01078 if ( ((int)nalign) == 0)
01079 nalign = 1;
01080 if ( ((int)nalign_pair) == 0)
01081 nalign_pair = 1;
01082 for (int i = 0; i < e_count; i++) {
01083 if ( e_types[i] == et_single ) {
01084 total += (( energy[i] * weight[i] ) / nalign);
01085 }
01086 if ( e_types[i] == et_gap ) {
01087 total += ( energy[i] * weight[i] ) / target_len;
01088 }
01089 if ( e_types[i] == et_pair ) {
01090 total += (( energy[i] * weight[i] ) / nalign_pair);
01091 }
01092 }
01093
01094 return total;
01095 }
01096
01097
01098 void ProspectThreadingInfo::ExtractTarget(TargetStruct *target_in, int start_in, int stop_in) {
01099 int start = start_in;
01100 int stop = stop_in;
01101 if (target_in == NULL)
01102 return;
01103 if (start < 0)
01104 start = 0;
01105 if (stop < 0 || stop > target_len)
01106 stop = target_len;
01107 if ( stop <= start )
01108 return;
01109
01110 int len = stop - start;
01111 target_in->residue = (TargetResidue *)malloc(sizeof(TargetResidue) * len);
01112 target_in->len = len;
01113 target_in->residue_offset = start;
01114 for (int i = start; i < stop; i++) {
01115 int cur = i - start;
01116 int template_pos = align.target_align[ i ];
01117 target_in->residue[ cur ].RS = target_rs[i];
01118 for (int j = 0; j < 21; j++)
01119 target_in->residue[ cur ].FREQ[j] = 0;
01120 target_in->residue[ cur ].FREQ[ AA1toAANum( target_rs[i] ) ] = 1;
01121
01122 if ( template_pos != -1)
01123 target_in->residue[ cur ].SS = template_ss[template_pos];
01124 else
01125 target_in->residue[ cur ].SS = 'L';
01126 for (int j = 0; j < 3; j++) {
01127 target_in->residue[ cur ].SS_PROB[j] = 0.0;
01128 }
01129 target_in->residue[ cur ].SS_PROB[ SS1toSSNum(target_in->residue[ cur ].SS ) ] = 1.0;
01130 target_in->residue[ cur ].orig_num = cur;
01131 }
01132 if ( residue_atoms ) {
01133 target_in->structure = new ResidueAtoms[ len ];
01134 for (int i = start; i < stop; i++) {
01135 int cur = i - start;
01136 target_in->structure[ cur ] = residue_atoms[i] ;
01137 }
01138 } else {
01139 target_in->structure = NULL;
01140 }
01141 }
01142