src/common/prospect_output.cc

00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <ctype.h>
00006 #include <assert.h>
00007 
00008 
00009 #include <libxml/tree.h>
00010 #include <libxml/parser.h>
00011 #include <libxml/xpath.h>
00012 #include <libxml/xpathInternals.h>
00013 
00014 #include "open_prospect.h"
00015 #include "sm_matrix.h"
00016 
00017 
00018 extern "C" {
00019 #include "foldrec_net.h"
00020 }
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 //private functions defined in prospect_variables.cc
00029 xmlNodePtr get_first_child(xmlNodePtr parent, char *str);
00030 char *get_first_child_content( xmlNodePtr parent, char *str);
00031 
00033 //Functions relateing to Prospect Output files
00035 
00036 
00037 
00038 
00039 ProspectOutput::ProspectOutput(char *path) {
00040         reorder = NULL;
00041         sort_val = NULL;
00042         output_xml_handle = NULL;
00043         if (path != NULL)
00044                 Load(path);
00045 }
00046 
00047 
00048 ProspectOutput::~ProspectOutput() {
00049         Free();
00050 }
00051 
00052 
00053 void ProspectOutput::Free(void){
00054         if (output_xml_handle != NULL) 
00055                 xmlFreeDoc( (xmlDocPtr) output_xml_handle );
00056         output_xml_handle = NULL;
00057         if (reorder != NULL)
00058                 free(reorder);
00059         reorder = NULL;
00060         if ( sort_val )
00061                 delete sort_val;
00062         sort_val = NULL;
00063 }
00064 
00065 
00066 
00067 int ProspectOutput::Load( char *path) {
00068         output_xml_handle = xmlParseFile( path );
00069         if (output_xml_handle == NULL) {
00070                 //fprintf(stderr, "Can't open %s\n", argv[1]);
00071                 return 1;
00072         }       
00073         //cur_node = xmlDocGetRootElement(doc); 
00074         return 0;
00075 }
00076 
00077 
00078 int ProspectOutput::AddThreadingInfo( ProspectThreadingInfo *thread_info ) {    
00079         if ( output_xml_handle == NULL ) {
00080                 output_xml_handle = (void *)xmlNewDoc( (xmlChar *)"1.0" );
00081                 xmlDocSetRootElement( (xmlDoc *)output_xml_handle, xmlNewNode( NULL, (xmlChar *)"prospectOutput" ));
00082         }       
00083         if ( thread_info->xml_node == NULL ) {
00084                 return 1;
00085         }       
00086         xmlNodePtr new_node = xmlCopyNode( (xmlNode *)thread_info->xml_node, 1);
00087         xmlNodePtr root_node = xmlDocGetRootElement( (xmlDoc *)output_xml_handle );
00088         xmlAddChild(root_node, new_node);       
00089         return 0;
00090 }
00091 
00092 
00093 
00094 int ProspectOutput::AddThreadingInfo( ProspectParam *param,
00095                                           WeightArray weight,   
00096                                           TemplateStruct *template_data,
00097                                           TargetStruct *target_data, 
00098                                           ScoreStruct *score,
00099                                           AlignmentStruct *alignment) {
00100 
00101         if ( output_xml_handle == NULL ) {
00102                 output_xml_handle = (void *)xmlNewDoc( (xmlChar *)"1.0" );
00103                 xmlDocSetRootElement( (xmlDoc *)output_xml_handle, xmlNewNode( NULL, (xmlChar *)"prospectOutput" ));
00104         }       
00105         
00106         char *xml_text = alignment->Write_XML(param,
00107                                                                                  weight,        
00108                                                                                  template_data,
00109                                                                                  target_data, 
00110                                                                                  score);
00111         if ( xml_text != NULL ) {
00112                 xmlNodePtr root_node = xmlDocGetRootElement( (xmlDoc *)output_xml_handle );
00113                 xmlDocPtr tmp_doc = xmlParseMemory (xml_text, strlen( xml_text ) ); 
00114                 xmlNodePtr new_node = xmlDocGetRootElement( tmp_doc );
00115                 xmlUnlinkNode(new_node);
00116                 xmlAddChild (root_node,new_node); 
00117                 xmlFreeDoc( tmp_doc );  
00118                 free( xml_text );
00119         }       
00120         return 0;
00121 }
00122 
00123 
00124 
00125 
00126 int ProspectOutput::Save( char *path ) {        
00127         return xmlSaveFile( path, (xmlDoc *)output_xml_handle); 
00128 }
00129 
00130 
00131 
00132 int ProspectOutput::GetThreadingCount(void ) {
00133         xmlDocPtr doc = (xmlDocPtr) output_xml_handle;
00134         if ( doc == NULL )
00135                 return 0;
00136         xmlNodePtr cur_node = xmlDocGetRootElement(doc);
00137         if (cur_node == NULL)
00138                 return 0;
00139         if ( !strcmp((char *)cur_node->name, "prospectOutput") ) {
00140                 cur_node = cur_node->children;
00141         }
00142         if (cur_node == NULL)
00143                 return 0;
00144         int count = 0;  
00145         while (cur_node != NULL) {
00146                 if (!strcmp((char *)cur_node->name, "threading")) {
00147                         count++;
00148                 }
00149                 cur_node = cur_node->next;
00150         }       
00151         return count;   
00152 }
00153 
00154 
00155 char * ProspectOutput::GetTemplateName( int N) {
00156         xmlDocPtr doc = (xmlDocPtr) output_xml_handle;
00157         xmlNodePtr cur_node = xmlDocGetRootElement(doc);
00158         if ( !strcmp((char *)cur_node->name, "prospectOutput") ) {
00159                 cur_node = cur_node->children;
00160         }
00161         int count = 0;  
00162         while (cur_node != NULL) {
00163                 if (!strcmp((char *)cur_node->name, "threading")) {
00164                         if ( count == N ) {
00165                                 return (char *)xmlGetProp(cur_node, (xmlChar *)"template");                             
00166                         }
00167                         count++;
00168                 }
00169                 cur_node = cur_node->next;
00170         }       
00171         return NULL;
00172 }
00173 
00174 
00175 int ProspectOutput::GetTemplateNum( char *name) {
00176         xmlDocPtr doc = (xmlDocPtr) output_xml_handle;
00177         xmlNodePtr cur_node = xmlDocGetRootElement(doc);
00178         if ( !strcmp((char *)cur_node->name, "prospectOutput") ) {
00179                 cur_node = cur_node->children;
00180         }
00181         int count = 0, match_num = -1;  
00182         while (cur_node != NULL) {
00183                 if (!strcmp((char *)cur_node->name, "threading")) {
00184                         char *cur_name = (char *)xmlGetProp(cur_node, (xmlChar *)"template");
00185                         if (cur_name) {
00186                                 if ( !strcmp( cur_name, name ) ) {
00187                                         match_num = count;
00188                                         cur_node = NULL;
00189                                 }
00190                                 free(cur_name);
00191                         }
00192                         count++;
00193                 }
00194                 if ( cur_node != NULL)
00195                         cur_node = cur_node->next;
00196         }       
00197         return match_num;
00198 }
00199 
00200 
00201 void ProspectOutput::GetRawNormalize( float &mean, float &sd ) {
00202         xmlDocPtr doc = (xmlDocPtr) output_xml_handle;  
00203         xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); /* Create xpath evaluation context */
00204         char searchpath[200] = "/prospectOutput/threading/score/raw";
00205         xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath, 
00206                                                                                                                  xpathCtx);             
00207         if(xpathObj == NULL) {
00208                 xmlXPathFreeContext(xpathCtx); 
00209                 return;
00210         }       
00211         int size = xpathObj->nodesetval->nodeNr; 
00212         if ( size > 0 ) {       
00213           float *raw_array = new float[ size ]; 
00214           for(int i = 0; i < size; i++) {
00215             char * text = (char *)xmlNodeGetContent(xpathObj->nodesetval->nodeTab[i]);
00216             raw_array[i] = atof( text );
00217             free(text);
00218           }     
00219           /* Cleanup of XPath data */
00220           xmlXPathFreeObject(xpathObj);
00221           xmlXPathFreeContext(xpathCtx);        
00222           double total = 0.0;
00223           for (int i = 0; i < size; i++) {
00224             total += raw_array[i];
00225         }
00226           mean = total / (double)size;  
00227           total = 0.0;
00228           for (int i = 0; i < size; i++) {
00229             total += pow( raw_array[i] - mean, 2 );
00230           }
00231           sd = sqrt( (total / (double)size) );
00232           delete raw_array;
00233         } else {
00234           mean = 0;
00235           sd = 0;
00236         }
00237 }
00238 
00239 
00240 xmlNodePtr Prospect_OutGetThreadingNode( xmlDocPtr output_xml_handle, int N) {
00241         xmlDocPtr doc = (xmlDocPtr) output_xml_handle;
00242         xmlNodePtr cur_node = xmlDocGetRootElement(doc);
00243         if ( !strcmp((char *)cur_node->name, "prospectOutput") ) {
00244                 cur_node = cur_node->children;
00245         }
00246         int count = 0;  
00247         while (cur_node != NULL) {
00248                 if (!strcmp((char *)cur_node->name, "threading")) {
00249                         if ( count == N ) {
00250                                 return cur_node;
00251                         }
00252                         count++;
00253                 }
00254                 cur_node = cur_node->next;
00255         }       
00256         return NULL;
00257 }
00258 
00259 
00260 typedef struct {
00261         int rank;
00262         float value;    
00263 } sort_feature; 
00264 
00265 int cmp_sort_feature(const void *arg1, const void *arg2) {
00266         sort_feature *stat1 = (sort_feature*)arg1, 
00267     *stat2 = (sort_feature*)arg2;
00268         if ( stat1->value < stat2->value ) 
00269                 return 1;
00270         else 
00271                 return -1;
00272         return 0;       
00273 }
00274 
00275 
00276 void ProspectOutput::Sort(char *eqn, bool minimize) {
00277         ThreadFeatureParse *feature_eqn = ThreadFeatureParse::CompileParse( eqn );
00278         int N = GetThreadingCount();
00279         sort_feature *features = (sort_feature *)malloc(sizeof(sort_feature) * N );     
00280 /*
00281         for (int i = 0; i < N; i++) { 
00282                 ProspectThreadingInfo my_thread_info = GetThreadingInfo( i );
00283                 features[i].value = feature_eqn->RunParse(&my_thread_info) ;
00284                 if (minimize)
00285                         features[i].value = -(features[i].value);
00286                 features[i].rank = i;
00287         }
00288  */
00289         if ( sort_val == NULL ) {
00290                 sort_val = new float[ N ];
00291         }
00292         float *tmp_array = feature_eqn->RunParse( this );
00293         for (int i = 0; i < N; i++) { 
00294                 features[i].value = tmp_array[i];
00295                 //SetSortVal( i, tmp_array[i]);
00296                 sort_val[i] = tmp_array[i];
00297                 if (minimize)
00298                         features[i].value = -(features[i].value);
00299                 features[i].rank = i;
00300         }
00301         free( tmp_array );
00302         
00303         qsort( features, N, sizeof(sort_feature), cmp_sort_feature );   
00304         if ( reorder )
00305                 free( reorder );
00306         reorder = (int *)malloc(sizeof(int) * N );
00307         for (int i = 0; i < N; i++) {
00308                 reorder[i] = features[i].rank;
00309         }
00310         free(features); 
00311         delete feature_eqn;
00312 }
00313 
00314 
00315 void ProspectOutput::Sort(ThreadFeatureParse *sort_func, bool minimize) {
00316         int N = GetThreadingCount();
00317         sort_feature *features = (sort_feature *)malloc(sizeof(sort_feature) * N );     
00318 /*
00319         ProspectThreadingInfo my_thread_info;
00320         for (int i = 0; i < N; i++) { 
00321                 GetThreadingInfo( i, &my_thread_info );
00322                 features[i].value = sort_func->RunParse(&my_thread_info) ;
00323                 if (minimize)
00324                         features[i].value = -(features[i].value);
00325                 features[i].rank = i;
00326         }
00327  */
00328         float *tmp_array = sort_func->RunParse( this );
00329         for (int i = 0; i < N; i++) { 
00330                 features[i].value = tmp_array[i];
00331                 if (minimize)
00332                         features[i].value = -(features[i].value);
00333                 features[i].rank = i;
00334         }
00335         free( tmp_array );
00336         
00337         qsort( features, N, sizeof(sort_feature), cmp_sort_feature );   
00338         if ( reorder )
00339                 free( reorder );
00340         reorder = (int *)malloc(sizeof(int) * N );
00341         for (int i = 0; i < N; i++) {
00342                 reorder[i] = features[i].rank;
00343         }
00344         free(features); 
00345 }
00346 
00347 
00348 
00349 void ProspectOutput::Sort(int method) { 
00350         int N = GetThreadingCount();
00351         sort_feature *features = (sort_feature *)malloc(sizeof(sort_feature) * N );
00352         
00353         xmlDocPtr doc = (xmlDocPtr) output_xml_handle;  
00354         xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); /* Create xpath evaluation context */
00355         if(xpathCtx == NULL) {
00356                 return;
00357         }
00358         
00359         char searchpath[200] = "/prospectOutput/threading/score/z";
00360         
00361         if (method == ScoreStruct::SCORE_RAW)
00362                 strcpy( searchpath, "/prospectOutput/threading/score/raw" );
00363         if (method == ScoreStruct::SCORE_Z)
00364                 strcpy( searchpath, "/prospectOutput/threading/score/z" );
00365         if (method == ScoreStruct::SCORE_ZFULL)
00366                 strcpy( searchpath, "/prospectOutput/threading/score/zfull" );
00367         if (method == ScoreStruct::SCORE_NN)
00368                 strcpy( searchpath, "/prospectOutput/threading/score/nn" );
00369         if (method == ScoreStruct::SCORE_ZMUTATIONLOG)
00370                 strcpy( searchpath, "/prospectOutput/threading/score/zmutationlog" );
00371         if (method == ScoreStruct::SCORE_ZSINGLETON)
00372                 strcpy( searchpath, "/prospectOutput/threading/score/zsingleton" );
00373         if (method == ScoreStruct::SCORE_ZSEC_STRUCT)
00374                 strcpy( searchpath, "/prospectOutput/threading/score/zsec_struct" );
00375         if (method == ScoreStruct::SCORE_ZTWOBODY)
00376                 strcpy( searchpath, "/prospectOutput/threading/score/ztwobody" );
00377         if (method == ScoreStruct::SCORE_ZDFIRE)
00378                 strcpy( searchpath, "/prospectOutput/threading/score/zdfire" );
00379         
00380         /* Evaluate xpath expression */
00381         xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath, 
00382                                                                                                                  xpathCtx);             
00383         if(xpathObj == NULL) {
00384                 xmlXPathFreeContext(xpathCtx); 
00385                 return;
00386         }
00387         
00388         int size = xpathObj->nodesetval->nodeNr;
00389         
00390         for(int i = 0; i < size; i++) {
00391                 char * text = (char *)xmlNodeGetContent(xpathObj->nodesetval->nodeTab[i]);
00392                 features[i].value = atof( text );
00393                 if ( method == ScoreStruct::SCORE_RAW)
00394                         features[i].value = -(features[i].value);
00395                 features[i].rank = i;
00396                 free(text);
00397         }       
00398         /* Cleanup of XPath data */
00399         xmlXPathFreeObject(xpathObj);
00400         xmlXPathFreeContext(xpathCtx); 
00401         qsort( features, size, sizeof(sort_feature), cmp_sort_feature );
00402         if ( reorder )
00403                 free( reorder );
00404         reorder = (int *)malloc(sizeof(int) * size );
00405         for (int i = 0; i < size; i++) {
00406                 reorder[i] = features[i].rank;
00407         }
00408         free(features);
00409 }
00410 
00411 /*
00412 void ProspectOutput::SortScoreUser( bool minimize ) {
00413         int N = GetThreadingCount();
00414         sort_feature *features = (sort_feature *)malloc(sizeof(sort_feature) * N );     
00415         for (int i = 0; i < N; i++) { 
00416                 features[i].value = sort_val[i];
00417                 if (minimize)
00418                         features[i].value = -(features[i].value);
00419                 features[i].rank = i;
00420         }
00421         qsort( features, N, sizeof(sort_feature), cmp_sort_feature );   
00422         if ( reorder )
00423                 free( reorder );
00424         reorder = (int *)malloc(sizeof(int) * N );
00425         for (int i = 0; i < N; i++) {
00426                 reorder[i] = features[i].rank;
00427         }
00428         free(features);         
00429 }
00430 */
00431 
00432 
00433 
00434 void ProspectOutput::SetSortVal( int i, float val  ) {
00435         if ( sort_val == NULL ) {
00436                 sort_val = new float[ GetThreadingCount() ];
00437         }
00438         int num = i;
00439         sort_val[ num ] = val;
00440 }
00441 
00442 
00443 
00444 float ProspectOutput::GetSortVal( int i  ) {
00445         if (sort_val == NULL)
00446                 return 0;
00447         int num = i;
00448         return sort_val[ num ]; 
00449 }
00450 
00451 
00452 
00453 double get_original_energy( xmlNodePtr node, char *name ) {
00454         xmlNodePtr child;
00455         char *text;
00456         double value;
00457         
00458         child = get_first_child( node, name );
00459         if (child) {
00460                 text = (char *)xmlGetProp(child, (xmlChar *)"e");
00461                 value = atof(text);
00462                 free(text);
00463                 return value;   
00464         }
00465         return 0;
00466 }
00467 
00468 double get_weights( xmlNodePtr node, char *name ) {
00469         xmlNodePtr child;
00470         char *text;
00471         double value;
00472         
00473         child = get_first_child( node, name );
00474         if (child) {
00475                 text = (char *)xmlNodeGetContent(child);
00476                 value = atof(text);
00477                 free(text);
00478                 return value;
00479         }
00480         return 0;
00481 }
00482 
00483 
00484 
00485 int ExtractNodeThreadingInfo( xmlNodePtr threading_node, ProspectThreadingInfo *thread_info) {
00486         thread_info->template_name =  (char *)xmlGetProp(threading_node, (xmlChar *)"template");
00487         
00488         thread_info->xml_node = (void *)threading_node;
00489         //get score information
00490         xmlNodePtr score_node = get_first_child( threading_node, "score");
00491         
00492         thread_info->scores.mask = 0x00;
00493         for ( int i = 0; i < ScoreStruct::SCORE_COUNT_TOTAL; i++) {
00494                 xmlNodePtr score_val_node = get_first_child( score_node, ScoreStruct::GetScoreName(i) );
00495                 if (score_val_node != NULL) {
00496                         char *ptr =  (char *)xmlNodeGetContent(score_val_node);
00497                         thread_info->scores.values[ i ] = atof(  ptr ) ;
00498                         free(ptr);
00499                         
00500                         ptr =  (char *)xmlGetProp(score_val_node, (xmlChar *)"mean");
00501                         if ( ptr ) {
00502                                 thread_info->scores.z_mean[ i ] = atof(  ptr ) ;
00503                                 free(ptr);
00504                         }
00505                         ptr =  (char *)xmlGetProp(score_val_node, (xmlChar *)"sd");
00506                         if ( ptr ) {
00507                                 thread_info->scores.z_sd[ i ] = atof(  ptr ) ;
00508                                 free(ptr);
00509                         }
00510                         thread_info->scores.mask |= (0x01<<i);
00511                 } 
00512         }               
00513         
00514         //get alignment information
00515         xmlNodePtr alignment_node = get_first_child( threading_node, "alignment" );
00516         xmlNodePtr target_ss_node = get_first_child( alignment_node, "query_ss_" );
00517         xmlNodePtr target_seq_node = get_first_child( alignment_node, "query_seq" );
00518         xmlNodePtr match_node = get_first_child( alignment_node, "match_str" );
00519         xmlNodePtr template_ss_node = get_first_child( alignment_node, "templ_ss_" );
00520         xmlNodePtr template_seq_node = get_first_child( alignment_node, "templ_seq" );
00521         xmlNodePtr template_energy_node = get_first_child( threading_node, "energy");
00522         xmlNodePtr template_weight_node = get_first_child(threading_node, "weights");
00523         xmlNodePtr template_core_node = get_first_child(alignment_node, "core_str_");
00524         
00525         thread_info->align_target_seq = (char *)xmlNodeGetContent( target_seq_node );
00526         thread_info->align_target_ss = (char *)xmlNodeGetContent( target_ss_node );
00527         thread_info->align_match = (char *)xmlNodeGetContent( match_node );
00528         thread_info->align_template_seq = (char *)xmlNodeGetContent( template_seq_node );
00529         thread_info->align_template_ss = (char *)xmlNodeGetContent( template_ss_node );
00530         
00531         char *core_align = (char *)xmlNodeGetContent( template_core_node );
00532         
00533         for (int j = 0; j < e_count; j++) {
00534                 thread_info->energy[j] = get_original_energy( template_energy_node, e_names[j]);
00535         }
00536         for (int j = 0; j < e_count; j++) {
00537                 thread_info->weight[j] = get_weights( template_weight_node, e_names[j]);
00538         }
00539         
00540         int target_pos = 0;
00541         thread_info->target_len = 0;
00542         while( thread_info->align_target_seq[ target_pos ] != 0 ) {
00543                 if ( thread_info->align_target_seq[ target_pos ] != '-' ) {
00544                         thread_info->target_len++;
00545                 }       
00546                 target_pos++;
00547         }
00548         
00549         int template_pos = 0;
00550         thread_info->template_len = 0;
00551         while( thread_info->align_template_seq[ template_pos ] != 0 ) {
00552                 if ( thread_info->align_template_seq[ template_pos ] != '-' ) {
00553                         thread_info->template_len++;
00554                 }       
00555                 template_pos++;
00556         }
00557         
00558         
00559         thread_info->target_rs = (char *)malloc( thread_info->target_len );
00560         thread_info->target_ss = (char *)malloc( thread_info->target_len );
00561         thread_info->template_rs = (char *)malloc( thread_info->template_len );
00562         thread_info->template_ss = (char *)malloc( thread_info->template_len );
00563         
00564         char *core_rs = (char *)malloc( thread_info->template_len );
00565         
00566         //thread_info->align.target_align = (int *)malloc(sizeof(int) * thread_info->target_len );
00567         //thread_info->align.template_align = (int *)malloc(sizeof(int) * thread_info->template_len );  
00568         //thread_info->align.target_len = thread_info->target_len;
00569         //thread_info->align.template_len = thread_info->template_len;
00570         int * target_align = (int *)malloc(sizeof(int) * thread_info->target_len );
00571         int * template_align = (int *)malloc(sizeof(int) * thread_info->template_len ); 
00572         
00573         template_pos = 0;
00574         target_pos = 0;
00575         int read_len = strlen(thread_info->align_target_seq);
00576         for (int i = 0; i < read_len ; i++) {
00577                 if ( target_pos < thread_info->target_len )
00578                         target_align[ target_pos ] = -1;
00579                 if (  template_pos < thread_info->template_len )
00580                         template_align[ template_pos ] = -1;
00581                 
00582                 if ( thread_info->align_target_seq[i] != '-' && thread_info->align_template_seq[i] != '-' ) {
00583                         assert( target_pos < thread_info->target_len );
00584                         assert(  template_pos < thread_info->template_len );
00585                         target_align[ target_pos ] = template_pos;
00586                         template_align[ template_pos ] = target_pos;            
00587                 }
00588                 if ( thread_info->align_target_seq[i] != '-' ) {
00589                         assert( target_pos < thread_info->target_len );
00590                         thread_info->target_rs[ target_pos ] = thread_info->align_target_seq[i];
00591                         thread_info->target_ss[ target_pos ] = thread_info->align_target_ss[i];
00592                         target_pos++;
00593                 }
00594                 if ( thread_info->align_template_seq[i] != '-' ) {
00595                         assert(  template_pos < thread_info->template_len );
00596                         thread_info->template_rs[ template_pos ] = thread_info->align_template_seq[i];          
00597                         thread_info->template_ss[ template_pos ] = thread_info->align_template_ss[i];           
00598                         core_rs[ template_pos ] = core_align[i];
00599                         template_pos++;
00600                 }       
00601         }
00602         thread_info->align.SetAlign( thread_info->target_len, target_align, thread_info->template_len, template_align );
00603         
00604         //count the cores
00605         thread_info->core_count = 0;
00606         for (int i = 0; i < thread_info->template_len; i++) {
00607                 if ( core_rs[i] == '-' ) {
00608                         if (i == 0 || core_rs[i-1] == ' ') {
00609                                 thread_info->core_count++;
00610                         }
00611                 }
00612         }
00613         
00614         thread_info->core = (TemplateCoreDefinition *)malloc(sizeof(TemplateCoreDefinition) * thread_info->core_count );
00615         
00616         template_pos = 0;
00617         for (int i = 0; i < thread_info->template_len; i++) {
00618                 if ( core_rs[i] == '-' ) {
00619                         if (i == 0 || core_rs[i-1] == ' ') {
00620                                 thread_info->core[ template_pos ].head = i;
00621                                 template_pos++;
00622                         }
00623                 }
00624                 if (core_rs[i] == ' ') {
00625                         if (i > 0 && core_rs[i-1] == '-') {
00626                                 thread_info->core[ template_pos-1 ].len = i - thread_info->core[ template_pos-1 ].head;
00627                         }
00628                 }
00629         }
00630         if ( core_rs[ thread_info->template_len-1 ] == '-' ) {
00631                 thread_info->core[ template_pos-1 ].len = 
00632                 thread_info->template_len - thread_info->core[ template_pos-1 ].head;
00633         }
00634         
00635         free(core_rs);
00636         free(core_align);
00637         
00638         thread_info->core_res_count = 0;
00639         for (int i = 0; i < thread_info->core_count; i++) {
00640                 thread_info->core_res_count += thread_info->core[ i ].len;
00641         }
00642         
00643         /*      
00644                 thread_info->nalign_core_res = 0;
00645          thread_info->nalign_core = 0;
00646          thread_info->nalign = 0;
00647          thread_info->nident = 0;
00648          for (int j = 0; j < thread_info->core_count; j++) {
00649                  if ( thread_info->align.GetTemplateAlign( thread_info->core[j].head ) != -1 ) {
00650                          thread_info->nalign_core++;
00651                  }              
00652          }
00653          for (int j = 0; j < thread_info->core_count; j++) {
00654                  thread_info->nalign_core_res += thread_info->core[j].len;              
00655          }
00656          for (int j = 0; j < thread_info->target_len; j++) {
00657                  if ( thread_info->align.GetTargetAlign(j) != -1 )  {
00658                          thread_info->nalign ++;
00659                          if ( thread_info->target_rs[j] == thread_info->template_rs[ thread_info->align.GetTargetAlign(j) ] ) {
00660                                  thread_info->nident++;
00661                          }
00662                  }
00663          }      
00664          
00665          
00666          {
00667                  char *nalign_ptr = (char *)xmlGetProp(alignment_node, (xmlChar *)"nalign");
00668                  char *npair_align_ptr = (char *)xmlGetProp(alignment_node, (xmlChar *)"npair_align");
00669                  
00670                  if ( nalign_ptr ) {
00671                          thread_info->nalign = atoi( nalign_ptr );
00672                          free( nalign_ptr );
00673                  }
00674                  if ( npair_align_ptr ) {
00675                          thread_info->npair_align = atoi( npair_align_ptr );
00676                          free( npair_align_ptr );
00677                  }
00678                  
00679          }      
00680          */
00681         {
00682                 xmlNodePtr feature_node = get_first_child( threading_node, "features" );                
00683                 if ( feature_node ) {
00684                         for ( int i = 0; i < thread_info->features.Count(); i++ ) {
00685                                 char *text = get_first_child_content( feature_node, thread_info->features.GetName(i) );
00686                                 if ( text != NULL ) {
00687                                         thread_info->features.Set(i, atof( text ) );
00688                                         free( text );
00689                                 }
00690                         }               
00691                 }
00692         }
00693         
00694         
00695         xmlNodePtr sidechain_node = get_first_child( threading_node, "sidechain_pack" );
00696         if ( sidechain_node != NULL ) {
00697                 xmlNodePtr lret_node = get_first_child( sidechain_node, "lret" );
00698                 if ( lret_node ) {
00699                         char *tmp_ptr = (char *)xmlNodeGetContent( lret_node );
00700                         thread_info->lret = atof(tmp_ptr);
00701                         free(tmp_ptr);
00702                 } else {
00703                         thread_info->lret = 100000;
00704                 }               
00705                 xmlNodePtr atom_count_node = get_first_child( sidechain_node, "atom_count" );
00706                 if ( lret_node ) {
00707                         char *tmp_ptr = (char *)xmlNodeGetContent( atom_count_node );
00708                         thread_info->atom_count = atoi(tmp_ptr);
00709                         free(tmp_ptr);
00710                 } else {
00711                         thread_info->atom_count = 0;
00712                 }
00713         } else {
00714                 thread_info->lret = 100000;
00715                 thread_info->atom_count = 0;
00716         }
00717         
00718         xmlNodePtr pdb_node = get_first_child( threading_node, "pdb" );
00719         if (pdb_node != NULL) {
00720                 thread_info->residue_atoms = (ResidueAtoms *)malloc(sizeof(ResidueAtoms) * thread_info->target_len);
00721                 for (int z = 0; z < thread_info->target_len; z++)
00722                         thread_info->residue_atoms[z].atom_count = 0;
00723                 char *read_buffer = (char *)xmlNodeGetContent(pdb_node);
00724                 char *cur_line = read_buffer;
00725                 int res_num = -1;
00726                 char cur_residue_name[30];
00727                 strcpy(cur_residue_name, "");   
00728                 char tmp_x_str[30], tmp_y_str[30], tmp_z_str[30];
00729                 strcpy(tmp_x_str, "");
00730                 strcpy(tmp_y_str, "");
00731                 strcpy(tmp_z_str, "");
00732                 while(cur_line[0] != 0) {
00733                         if ( !strncmp(cur_line, "ATOM", 4) ) {
00734                                 char tmp_str[50];
00735                                 strncpy( tmp_str, &(cur_line[22]), 5 );
00736                                 tmp_str[5] = 0;
00737                                 if (strcmp( cur_residue_name, tmp_str ) ) {
00738                                         //                                      res_num++;
00739                                         //residue id//////////////////////////////////////
00740                                         char residue_id[10];
00741                                         strncpy( residue_id, &(cur_line[22]), 5 );
00742                                         residue_id[5] = 0;
00743                                         //                                      clean_space( residue_id );
00744                                         res_num = atoi( residue_id );
00745                                         thread_info->residue_atoms[res_num].atom_count = 0;
00746                                         strcpy( cur_residue_name, tmp_str );
00747                                 }
00748                                 if (res_num >= 0) {                             
00749                                         strncpy(tmp_x_str, &(cur_line[30]), 8);
00750                                         tmp_x_str[8] = 0;
00751                                         strncpy(tmp_y_str, &(cur_line[38]), 8);
00752                                         tmp_y_str[8] = 0;
00753                                         strncpy(tmp_z_str, &(cur_line[46]), 8);
00754                                         tmp_z_str[8] = 0;
00755                                         //location information/////////////////////////////
00756                                         thread_info->residue_atoms[res_num].atom[thread_info->residue_atoms[res_num].atom_count][0] = atof(tmp_x_str);
00757                                         thread_info->residue_atoms[res_num].atom[thread_info->residue_atoms[res_num].atom_count][1] = atof(tmp_y_str);
00758                                         thread_info->residue_atoms[res_num].atom[thread_info->residue_atoms[res_num].atom_count][2] = atof(tmp_z_str);
00759                                         
00760                                         //Temperature factor/////////////////////////////
00761                                         strncpy( tmp_str, &(cur_line[60]), 6);
00762                                         tmp_str[6] = 0;
00763                                         thread_info->residue_atoms[res_num].temp_factor[thread_info->residue_atoms[res_num].atom_count] = atof( tmp_str );                                      
00765                                         //strncpy( tmp_str, &(cur_line[17]), 3 );
00766                                         //tmp_str[3] = 0;
00767                                         //target_data->residue[res_num].RS = AA3toAA1( tmp_str );                                       
00768                                         //atom name//////////////////////////////////////
00769                                         //strncpy( thread_info->residue_atoms[res_num].name[thread_info->residue_atoms[res_num].atom_count], &(cur_line[13]), 3);
00770                                         //thread_info->residue_atoms[res_num].name[thread_info->residue_atoms[res_num].atom_count][3] = 0;
00771                                         int atom_namenum = PDBAtomtoNum( &(cur_line[13]) );
00772                                         thread_info->residue_atoms[res_num].atom_name[ thread_info->residue_atoms[res_num].atom_count ] = 
00773                                                 atom_namenum;                                   
00774                                         if ( atom_namenum == anN || atom_namenum == anCa || 
00775                                                  atom_namenum == anC || atom_namenum == anO  ) {
00776                                                 thread_info->residue_atoms[res_num].bb_num[ atom_namenum ] = thread_info->residue_atoms[res_num].atom_count;
00777                                         }                               
00778                                         
00779                                         //FIXME:  need to fill in bb_num stuff
00780                                         //      clean_space( the_data->atom[i].atom_name );
00781                                         
00782                                         /*
00783                                          the_data->atom[i].chain = str[21];
00784                                          the_data->atom[i].color[0] = ((float)random()) / (float)LONG_MAX;
00785                                          the_data->atom[i].color[1] = ((float)random()) / (float)LONG_MAX;
00786                                          the_data->atom[i].color[2] = ((float)random()) / (float)LONG_MAX;
00787                                          */
00788                                         thread_info->residue_atoms[res_num].atom_count++;
00789                                 }
00790                         }
00791                         if ( !strncmp(cur_line, "ENDMDL", 6) ) {
00792                                 break;
00793                         }
00794                         
00795                         while (cur_line[0] != '\n' && cur_line[0] != 0) {
00796                                 cur_line++;
00797                         }
00798                         if (cur_line[0] == '\n')
00799                                 cur_line++;             
00800                 }
00801                 free(read_buffer);              
00802         } else {
00803                 thread_info->residue_atoms = NULL;
00804         }       
00805         return 0;       
00806 }
00807 
00808 
00809 
00810 ProspectThreadingInfo ProspectOutput::GetThreadingInfo(int N) { 
00811         ProspectThreadingInfo my_thread;
00812         GetThreadingInfo(  N, &my_thread );     
00813         return my_thread;
00814 }
00815 
00816 
00817 int ProspectOutput::GetThreadingInfo(  const char *name, ProspectThreadingInfo *thread_info) {  
00818         xmlDocPtr doc = (xmlDocPtr) output_xml_handle;  
00819         xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); /* Create xpath evaluation context */
00820         if(xpathCtx == NULL) {
00821                 return 1;
00822         }       
00823         char searchpath[2000];
00824         sprintf(searchpath, "/prospectOutput/threading[@template='%s']", name); 
00825         xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath, xpathCtx);             
00826         if(xpathObj == NULL) {
00827                 xmlXPathFreeContext(xpathCtx); 
00828                 return 1;
00829         }       
00830         int size = xpathObj->nodesetval->nodeNr;        
00831         if ( size == 0 )
00832                 return 1;       
00833         ExtractNodeThreadingInfo( xpathObj->nodesetval->nodeTab[0], thread_info);               
00834         thread_info->parent = this;
00835         xmlXPathFreeObject(xpathObj);
00836         xmlXPathFreeContext(xpathCtx);          
00837         return 0;
00838 }
00839 
00840 int ProspectOutput::GetThreadingInfo(  int N, ProspectThreadingInfo *thread_info) {
00841         xmlNodePtr threading_node = Prospect_OutGetThreadingNode( (xmlDocPtr)output_xml_handle, N);
00842         if ( NULL == threading_node ) {
00843                 return 1;
00844         }
00845         ExtractNodeThreadingInfo( threading_node, thread_info);         
00846         thread_info->parent = this;
00847         return 0;
00848 }
00849 
00850 
00851 int ProspectOutput::Get_PDBText(int N, ProspectThreadingInfo *thread_info) {
00852         xmlNodePtr threading_node = Prospect_OutGetThreadingNode((xmlDocPtr)output_xml_handle, N);
00853         if ( NULL == threading_node ) {
00854                 return 1;
00855         }
00856         xmlNodePtr sidechain_node = get_first_child( threading_node, "sidechain_pack" );
00857         if ( sidechain_node != NULL ) {
00858                 xmlNodePtr pdb_node = get_first_child( sidechain_node, "pdb" );
00859                 if ( pdb_node != NULL ) {
00860                         thread_info->pdb_text = (char *)xmlNodeGetContent( pdb_node );
00861                 }               
00862         }       
00863         return 0;
00864 }
00865 
00866 
00867 ProspectThreadingInfo::ProspectThreadingInfo(){
00868         parent = NULL;
00869         align_target_seq = NULL;
00870         align_target_ss = NULL;
00871         align_match = NULL;
00872         align_template_seq = NULL;
00873         align_template_ss = NULL;
00874         template_name = NULL;
00875         template_rs = NULL;
00876         pdb_text = NULL;
00877         target_rs = NULL;
00878         template_ss = NULL;
00879         target_ss = NULL;
00880         core = NULL;
00881         xml_node = NULL;
00882 }
00883 
00884 
00885 ProspectThreadingInfo::~ProspectThreadingInfo() {
00886         Free(); 
00887 }
00888 
00889 
00890 void ProspectThreadingInfo::Free(void) {
00891         if (align_target_seq != NULL)
00892                 free( align_target_seq );
00893         align_target_seq = NULL;
00894         if (align_target_ss != NULL)
00895                 free( align_target_ss );
00896         align_target_ss = NULL;
00897         if (align_match != NULL)
00898                 free( align_match );
00899         align_match = NULL;
00900         if (align_template_seq != NULL)
00901                 free( align_template_seq );
00902         align_template_seq = NULL;
00903         if (align_template_ss != NULL)
00904                 free( align_template_ss );      
00905         align_template_ss = NULL;
00906         if (template_name != NULL)
00907                 free( template_name );
00908         template_name = NULL;
00909         if (template_rs != NULL)
00910                 free(template_rs);
00911         template_rs = NULL;
00912         if (target_rs != NULL)
00913                 free(target_rs);
00914         target_rs = NULL;
00915         if (template_ss != NULL)
00916                 free(template_ss);
00917         template_ss = NULL;
00918         if (target_ss != NULL)
00919                 free(target_ss);
00920         target_ss = NULL;
00921         if (core != NULL)
00922                 free(core);
00923         core = NULL;
00924         if (pdb_text != NULL)
00925                 free(pdb_text);
00926         pdb_text = NULL;
00927         if (residue_atoms != NULL)
00928                 free(residue_atoms);
00929         residue_atoms = NULL;
00930         
00931         parent = NULL;
00932 }
00933 
00934 
00935 
00936 void ProspectThreadingInfo::SetScoreExtra(int extra_num, pValType val ) {
00937 
00938         int score_num = -1;
00939         switch (extra_num) {
00940                 case 0: {
00941                         score_num = ScoreStruct::SCORE_USER0;
00942                         break;
00943                 }
00944                 case 1: {
00945                         score_num = ScoreStruct::SCORE_USER1;
00946                         break;
00947                 }
00948                 case 2: {
00949                         score_num = ScoreStruct::SCORE_USER2;
00950                         break;
00951                 }
00952                 case 3: {
00953                         score_num = ScoreStruct::SCORE_USER3;
00954                         break;
00955                 }
00956         };
00957         if ( score_num == -1 )
00958                 return;
00959         
00960         scores.SetScore( score_num, val );
00961 
00962         if ( xml_node == NULL )
00963                 return;
00964         
00965         //write into the XML, so the change will be saved
00966         xmlNodePtr threading_node = (xmlNodePtr) xml_node;
00967         xmlNodePtr score_node = get_first_child( threading_node, "score");
00968         xmlNodePtr score_val_node = get_first_child( score_node, ScoreStruct::GetScoreName( score_num ) );
00969         
00970         if ( score_val_node != NULL ) {
00971                 xmlUnlinkNode(score_val_node);
00972                 xmlFreeNode(score_val_node);    
00973         }
00974         char buffer[1000];
00975         sprintf(buffer, "%g", val);
00976         score_val_node = xmlNewChild(score_node, NULL, (xmlChar *)ScoreStruct::GetScoreName( score_num ), (xmlChar *)buffer);
00977 }
00978 
00979 
00980 
00981 pValType ProspectThreadingInfo::GetScoreExtra(int extra_num ) {
00982         if (extra_num == 0)     return scores.GetScore( ScoreStruct::SCORE_USER0 );
00983         if (extra_num == 1)     return scores.GetScore( ScoreStruct::SCORE_USER1 );
00984         if (extra_num == 2)     return scores.GetScore( ScoreStruct::SCORE_USER2 );
00985         if (extra_num == 3)     return scores.GetScore( ScoreStruct::SCORE_USER3 );
00986         return 0;
00987 }
00988 
00989 
00990 ProspectThreadingInfo::ProspectThreadingInfo(const ProspectThreadingInfo &rhs) {
00991         Free();
00992         Copy(rhs);
00993 }
00994 
00995 ProspectThreadingInfo & ProspectThreadingInfo::operator =(const ProspectThreadingInfo &rhs) {
00996         Free();
00997         Copy(rhs);
00998         return *this;
00999 }
01000 
01001 
01002 
01003 void ProspectThreadingInfo::Copy( const ProspectThreadingInfo &rhs) {
01004         memcpy( this, &rhs, sizeof( ProspectThreadingInfo ) );
01005         if ( rhs.align_target_seq )
01006                 align_target_seq = strdup( rhs.align_target_seq );
01007         if ( rhs.align_target_ss )
01008                 align_target_ss = strdup( rhs.align_target_ss );
01009         if( rhs.align_match )
01010                 align_match = strdup( rhs.align_match );
01011         if( rhs.align_template_seq )
01012                 align_template_seq = strdup( rhs.align_template_seq );
01013         if( rhs.align_template_ss )
01014                 align_template_ss = strdup( rhs.align_template_ss );
01015         if( rhs.template_name )
01016                 template_name = strdup( rhs.template_name );
01017         if( rhs.template_rs )
01018                 template_rs = strdup( rhs.template_rs );
01019         if( rhs.target_rs )
01020                 target_rs = strdup( rhs.target_rs );
01021         if( rhs.template_ss )
01022                 template_ss = strdup( rhs.template_ss );
01023         if( rhs.target_ss )
01024                 target_ss = strdup( rhs.target_ss );
01025         if ( rhs.pdb_text )
01026                 pdb_text = strdup( rhs.pdb_text );
01027         
01028         if ( core_count > 0 ) {
01029                 core = (TemplateCoreDefinition *)malloc(sizeof(TemplateCoreDefinition) * core_count);
01030                 if ( core ) 
01031                         memcpy( core, rhs.core, sizeof(TemplateCoreDefinition) * core_count );
01032         } else {
01033                 core = NULL;
01034         }
01035         if ( atom_count > 0 ) {
01036                 residue_atoms = (ResidueAtoms *)malloc(sizeof(ResidueAtoms) * atom_count);
01037                 if ( residue_atoms ) 
01038                         memcpy( residue_atoms, rhs.residue_atoms, sizeof(ResidueAtoms) * atom_count );
01039         } else {
01040                 residue_atoms = NULL;
01041         }
01042         align = (AlignmentStruct) rhs.align;
01043         parent = rhs.parent;
01044         for ( int i=0; i < e_count; i++ ) {
01045                 weight[i] = rhs.weight[i];
01046                 energy[i] = rhs.energy[i];
01047         }
01048 }
01049 
01050 
01051 pValType ProspectThreadingInfo::Score_Calc_NN(void ) {
01052         //format string "%sr %el %e1 %eg %es %ed %sr %fq %ft %fc %am %ac %an %ai"       
01053         float nn_input[14];
01054         nn_input[0] = this->scores.values[ ScoreStruct::SCORE_RAW ];
01055         nn_input[1] = this->energy[ e_mutationlog ];
01056         nn_input[2] = this->energy[ e_singleton ];
01057         nn_input[3] = this->energy[ e_gapconst ] + this->energy[ e_gapopen ];
01058         nn_input[4] = this->energy[ e_sec_struct ];
01059         nn_input[5] = this->energy[ e_dfire ];
01060         nn_input[6] = this->scores.values[ ScoreStruct::SCORE_Z ];
01061         nn_input[7] = this->target_len;
01062         nn_input[8] = this->template_len;
01063         nn_input[9] = this->core_count;         
01064         nn_input[10] = this->features.Get( AlignFeatures::fn_align_core_res );
01065         nn_input[11] = this->features.Get( AlignFeatures::fn_align_core );
01066         nn_input[12] = this->features.Get( AlignFeatures::fn_align );
01067         nn_input[13] = this->features.Get( AlignFeatures::fn_ident );   
01068         float nn_output;
01069         foldrec_net(  nn_input, &nn_output, 1 );
01070         return nn_output;
01071 }
01072 
01073 
01074 pValType ProspectThreadingInfo::GetAveRaw(void) {
01075         pValType total = 0;
01076         pValType nalign = features.Get( AlignFeatures::fn_align );      
01077         pValType nalign_pair = features.Get( AlignFeatures::fn_align_pair );    
01078         if ( ((int)nalign) == 0)
01079                 nalign = 1;
01080         if ( ((int)nalign_pair) == 0)
01081                 nalign_pair = 1;        
01082         for (int i = 0; i < e_count; i++) {
01083                 if ( e_types[i] == et_single ) {
01084                         total += (( energy[i] * weight[i] ) / nalign);
01085                 }
01086                 if ( e_types[i] == et_gap ) {
01087                         total += ( energy[i] * weight[i] ) / target_len;
01088                 }
01089                 if ( e_types[i] == et_pair ) {
01090                         total += (( energy[i] * weight[i] ) / nalign_pair);                     
01091                 }
01092         }
01093         //assert( abs( total ) < 10000 );
01094         return total;
01095 }
01096 
01097 
01098 void ProspectThreadingInfo::ExtractTarget(TargetStruct *target_in, int start_in, int stop_in) {
01099         int start = start_in;
01100         int stop = stop_in;
01101         if (target_in == NULL)
01102                 return;
01103         if (start < 0) 
01104                 start = 0;
01105         if (stop < 0 || stop > target_len)
01106                 stop = target_len;
01107         if ( stop <= start )
01108                 return;
01109         
01110         int len = stop - start;
01111         target_in->residue = (TargetResidue *)malloc(sizeof(TargetResidue) * len);
01112         target_in->len = len;
01113         target_in->residue_offset = start;
01114         for (int i = start; i < stop; i++) {
01115                 int cur = i - start;            
01116                 int template_pos = align.target_align[ i ];
01117                 target_in->residue[ cur ].RS = target_rs[i];
01118                 for (int j = 0; j < 21; j++)
01119                         target_in->residue[ cur ].FREQ[j] = 0;
01120                 target_in->residue[ cur ].FREQ[ AA1toAANum( target_rs[i] ) ] = 1;
01121                 
01122                 if ( template_pos != -1)
01123                         target_in->residue[ cur ].SS = template_ss[template_pos];
01124                 else 
01125                         target_in->residue[ cur ].SS = 'L'; //set residues without matching template structure to loop
01126                 for (int j = 0; j < 3; j++) {
01127                         target_in->residue[ cur ].SS_PROB[j] = 0.0;
01128                 }
01129                 target_in->residue[ cur ].SS_PROB[ SS1toSSNum(target_in->residue[ cur ].SS ) ] = 1.0;
01130                 target_in->residue[ cur ].orig_num = cur;
01131         }
01132         if ( residue_atoms ) {
01133                 target_in->structure = new ResidueAtoms[ len ];
01134                 for (int i = start; i < stop; i++) {
01135                         int cur = i - start;            
01136                         target_in->structure[ cur ] = residue_atoms[i] ;
01137                 }
01138         } else {
01139                 target_in->structure = NULL;
01140         }       
01141 }
01142 

Generated on Wed Apr 11 16:50:50 2007 for open_prospect by  doxygen 1.4.6