00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <ctype.h>
00006 #include <assert.h>
00007
00008
00009 #include <libxml/tree.h>
00010 #include <libxml/parser.h>
00011 #include <libxml/xpath.h>
00012 #include <libxml/xpathInternals.h>
00013
00014 #include "open_prospect.h"
00015 #include "sm_matrix.h"
00016
00017
00018
00022
00023
00024 char f_names[f_count][30] = {
00025 "nalign",
00026 "nident",
00027 "ncontact",
00028 "nhalfcontact",
00029 "nalign_pair",
00030 "nalign_core",
00031 "nalign_core_res",
00032 "ralign",
00033 "rident",
00034 "rcontact",
00035 "rhalfcontact",
00036 "ralign_pair"
00037 "rsurface"
00038 };
00039
00040 char *AlignFeatures::GetName(unsigned int i) {
00041 return f_names[i];
00042 }
00043
00044
00048
00049 ThreadFeatureParse::ThreadFeatureParse() {
00050 ops = NULL;
00051 mem = NULL;
00052 }
00053
00054 ThreadFeatureParse::ThreadFeatureParse( feature_parse_op *op_list, int op_count_in, int mem_size ) {
00055 ops = op_list;
00056 op_count = op_count_in;
00057 mem = new double[ mem_size ];
00058 mem_count = mem_size;
00059 }
00060
00061 ThreadFeatureParse::~ThreadFeatureParse() {
00062 if (ops)
00063 delete ops;
00064 if (mem)
00065 delete mem;
00066 }
00067
00068
00069 ThreadFeatureParse::ThreadFeatureParse( char *eqn_str ) {
00070 ThreadFeatureParse *tmp = CompileParse( eqn_str );
00071 *this = *tmp;
00072 delete tmp;
00073 }
00074
00075 ThreadFeatureParse & ThreadFeatureParse::operator =( const ThreadFeatureParse &rhs ) {
00076 Copy( rhs );
00077 return *this;
00078 }
00079
00080 void ThreadFeatureParse::Copy(const ThreadFeatureParse &rhs) {
00081 op_count = rhs.op_count;
00082 mem_count = rhs.mem_count;
00083 ops = new feature_parse_op[ op_count ];
00084 mem = new double[ mem_count ];
00085 memcpy( ops, rhs.ops, sizeof(feature_parse_op) * op_count );
00086 }
00087
00088
00089
00090 typedef float (*feature_extract_func)(ProspectThreadingInfo *, int arg);
00091
00092 typedef float * (*feature_array_extract_func)(ProspectOutput *, int arg);
00093
00094 typedef struct feature_struct {
00095 char tag[6];
00096 char desc[40];
00097 feature_extract_func extract_func;
00098 feature_array_extract_func extract_array_func;
00099 int extract_arg;
00100 } feature_struct;
00101
00102
00103 #define f_template_len 0
00104 #define f_target_len 1
00105 #define f_core_count 2
00106 #define f_coreres_len 3
00107 #define f_targ_temp_len 4
00108 #define f_temp_targ_len 5
00109
00110
00111
00112
00113 feature_struct feature_array[] = {
00114
00115
00116 {"sr", "score raw", ThreadFeatureParse::get_score, ThreadFeatureParse::get_score_array, ScoreStruct::SCORE_RAW},
00117 {"srz", "score raw", ThreadFeatureParse::get_raw_zscore, ThreadFeatureParse::get_raw_zscore_array, 0},
00118 {"sz", "score z", ThreadFeatureParse::get_score, ThreadFeatureParse::get_score_array, ScoreStruct::SCORE_Z},
00119 {"sf", "score zfull", ThreadFeatureParse::get_score, ThreadFeatureParse::get_score_array, ScoreStruct::SCORE_ZFULL},
00120
00121 {"sel", "score z mutation log", ThreadFeatureParse::get_score, ThreadFeatureParse::get_score_array, ScoreStruct::SCORE_ZMUTATIONLOG},
00122 {"se1", "score z singleton", ThreadFeatureParse::get_score, ThreadFeatureParse::get_score_array, ScoreStruct::SCORE_ZSINGLETON},
00123 {"ses", "score z secondary structure", ThreadFeatureParse::get_score, ThreadFeatureParse::get_score_array, ScoreStruct::SCORE_ZSEC_STRUCT},
00124 {"se2", "score z twobody", ThreadFeatureParse::get_score, ThreadFeatureParse::get_score_array, ScoreStruct::SCORE_ZTWOBODY},
00125 {"sed", "score z dfire", ThreadFeatureParse::get_score, ThreadFeatureParse::get_score_array, ScoreStruct::SCORE_ZDFIRE},
00126
00127
00128
00129
00130
00131
00132 {"su0", "score user defined", ThreadFeatureParse::get_score, ThreadFeatureParse::get_score_array, ScoreStruct::SCORE_USER0},
00133 {"su1", "score user defined", ThreadFeatureParse::get_score, ThreadFeatureParse::get_score_array, ScoreStruct::SCORE_USER1},
00134 {"su2", "score user defined", ThreadFeatureParse::get_score, ThreadFeatureParse::get_score_array, ScoreStruct::SCORE_USER2},
00135 {"su3", "score user defined", ThreadFeatureParse::get_score, ThreadFeatureParse::get_score_array, ScoreStruct::SCORE_USER3},
00136
00137 {"szm", "score z mean", ThreadFeatureParse::get_score_mean, ThreadFeatureParse::get_score_array_mean, ScoreStruct::SCORE_Z},
00138 {"szs", "score z sd", ThreadFeatureParse::get_score_sd, ThreadFeatureParse::get_score_array_sd, ScoreStruct::SCORE_Z},
00139 {"sfm", "score zfull mean", ThreadFeatureParse::get_score_mean, ThreadFeatureParse::get_score_array_mean, ScoreStruct::SCORE_ZFULL},
00140 {"sfs", "score zfull sd", ThreadFeatureParse::get_score_sd, ThreadFeatureParse::get_score_array_sd, ScoreStruct::SCORE_ZFULL},
00141 {"selm", "score z mutation log mean", ThreadFeatureParse::get_score_mean, ThreadFeatureParse::get_score_array_mean, ScoreStruct::SCORE_ZMUTATIONLOG},
00142 {"sels", "score z mutation log sd", ThreadFeatureParse::get_score_sd, ThreadFeatureParse::get_score_array_sd, ScoreStruct::SCORE_ZMUTATIONLOG},
00143 {"se1m", "score z singleton mean", ThreadFeatureParse::get_score_mean, ThreadFeatureParse::get_score_array_mean, ScoreStruct::SCORE_ZSINGLETON},
00144 {"se1s", "score z singleton sd", ThreadFeatureParse::get_score_sd, ThreadFeatureParse::get_score_array_sd, ScoreStruct::SCORE_ZSINGLETON},
00145 {"sesm", "score z secondary structure mean", ThreadFeatureParse::get_score_mean, ThreadFeatureParse::get_score_array_mean, ScoreStruct::SCORE_ZSEC_STRUCT},
00146 {"sess", "score z secondary structure sd", ThreadFeatureParse::get_score_sd, ThreadFeatureParse::get_score_array_sd, ScoreStruct::SCORE_ZSEC_STRUCT},
00147 {"se2m", "score z twobody mean", ThreadFeatureParse::get_score_mean, ThreadFeatureParse::get_score_array_mean, ScoreStruct::SCORE_ZTWOBODY},
00148 {"se2s", "score z twobody sd", ThreadFeatureParse::get_score_sd, ThreadFeatureParse::get_score_array_sd, ScoreStruct::SCORE_ZTWOBODY},
00149 {"sedm", "score z dfire mean", ThreadFeatureParse::get_score_mean, ThreadFeatureParse::get_score_array_mean, ScoreStruct::SCORE_ZDFIRE},
00150 {"seds", "score z dfire sd", ThreadFeatureParse::get_score_sd, ThreadFeatureParse::get_score_array_sd, ScoreStruct::SCORE_ZDFIRE},
00151
00152
00153 {"ft", "template length", ThreadFeatureParse::get_struct_feature, ThreadFeatureParse::get_struct_len_array, f_template_len },
00154 {"fq", "target length", ThreadFeatureParse::get_struct_feature, ThreadFeatureParse::get_struct_len_array, f_target_len },
00155 {"fc", "core count", ThreadFeatureParse::get_struct_feature, NULL, f_core_count },
00156
00157 {"fr", "length ratio (fq/ft)", ThreadFeatureParse::get_struct_feature, NULL, f_targ_temp_len},
00158 {"fR", "length ratio (ft/fq)", ThreadFeatureParse::get_struct_feature, NULL, f_temp_targ_len},
00159
00160
00161 {"an", "num align res", ThreadFeatureParse::get_align_feature, ThreadFeatureParse::get_align_feature_array, AlignFeatures::fn_align},
00162 {"aN", "% align res", ThreadFeatureParse::get_align_feature, ThreadFeatureParse::get_align_feature_array, AlignFeatures::fr_align },
00163 {"ac", "num align core", ThreadFeatureParse::get_align_feature,ThreadFeatureParse::get_align_feature_array, AlignFeatures::fn_align_core },
00164
00165 {"ai", "num align ident res", ThreadFeatureParse::get_align_feature,ThreadFeatureParse::get_align_feature_array, AlignFeatures::fn_ident},
00166 {"aI", "% align ident res", ThreadFeatureParse::get_align_feature, ThreadFeatureParse::get_align_feature_array, AlignFeatures::fr_ident},
00167 {"am", "num align core res", ThreadFeatureParse::get_align_feature,ThreadFeatureParse::get_align_feature_array, AlignFeatures::fn_align_core_res },
00168
00169
00170 {"a2c", "num contact pairs", ThreadFeatureParse::get_align_feature, ThreadFeatureParse::get_align_feature_array, AlignFeatures::fn_contact },
00171 {"a2d", "num half contact pairs", ThreadFeatureParse::get_align_feature, ThreadFeatureParse::get_align_feature_array, AlignFeatures::fn_halfcontact },
00172
00173 {"af", "num target alignment fragments", ThreadFeatureParse::get_target_frag_count, NULL, 0},
00174
00175
00176
00177
00178
00179
00180 {"wm", "mutation weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_mutation },
00181 {"wl", "mutation log weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_mutationlog },
00182 {"w1", "singleton weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_singleton },
00183 {"ws", "secondary struct weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_sec_struct },
00184 {"w2", "twobody weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_twobody },
00185 {"wd", "dfire weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_dfire },
00186 {"wo", "gap open weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_gapopen },
00187 {"we", "gap const weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_gapconst },
00188 {"wt", "coredel weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_coredel },
00189 {"wa", "template singledel weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_tsingledel },
00190 {"wi", "template singleins weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_tsingleins },
00191 {"wqa", "target singledel weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_qsingledel },
00192 {"wqi", "target singleins weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_qsingleins },
00193 {"wc", "contactdel weight", ThreadFeatureParse::get_weight, ThreadFeatureParse::get_weight_array, e_contactdel },
00194
00195
00196 {"em", "mutation energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_mutation },
00197 {"el", "mutation log energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_mutationlog },
00198 {"e1", "singleton energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_singleton},
00199 {"es", "secondary struct energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_sec_struct},
00200 {"e2", "twobody energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_twobody },
00201 {"ed", "dfire energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_dfire},
00202 {"et", "coredel energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_coredel},
00203 {"eo", "gap open energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_gapopen},
00204 {"ee", "gap const energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_gapconst},
00205
00206 {"ea", "template singledel energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_tsingledel},
00207 {"ei", "template singleins energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_tsingleins},
00208 {"eqa", "target singledel energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_qsingledel},
00209 {"eqi", "target singleins energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_qsingleins},
00210 {"ec", "contactdel energy", ThreadFeatureParse::get_energy, ThreadFeatureParse::get_energy_array, e_contactdel},
00211
00212
00213 {"emw", "mutation energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_mutation },
00214 {"elw", "mutation log energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_mutationlog },
00215 {"e1w", "singleton energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_singleton},
00216 {"esw", "secondary struct energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_sec_struct},
00217 {"e2w", "twobody energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_twobody },
00218 {"edw", "dfire energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_dfire},
00219 {"etw", "coredel energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_coredel},
00220 {"eow", "gap open energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_gapopen},
00221 {"ecw", "gap const energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_gapconst},
00222
00223 {"eaw", "template singledel energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_tsingledel},
00224 {"eiw", "template singleins energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_tsingleins},
00225 {"eqaw", "query singledel energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_qsingledel},
00226 {"eqiw", "query singleins energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_qsingleins},
00227 {"ecw", "contactdel energy * weight", ThreadFeatureParse::get_energy_weight, ThreadFeatureParse::get_energy_weight_array, e_contactdel}
00228
00229 };
00230
00231
00232 int ThreadFeatureParse::GetFeatureID( char *feature_key) {
00233
00234 for (int i= 0; i < sizeof(feature_array) / sizeof(feature_array[0]); i++) {
00235 if ( !strcmp( feature_array[i].tag, feature_key ) ) {
00236 return i;
00237 }
00238 }
00239 return -1;
00240 }
00241
00242
00243
00244 float ThreadFeatureParse::GetFeatureFloat( ProspectThreadingInfo *thread, char *feature_key) {
00245
00246 for (int i= 0; i < sizeof(feature_array) / sizeof(feature_array[0]); i++) {
00247 if ( !strcmp( feature_array[i].tag, feature_key ) ) {
00248 return feature_array[i].extract_func( thread, feature_array[i].extract_arg );
00249 }
00250 }
00251 return 0;
00252 }
00253
00254
00255 float ThreadFeatureParse::GetFeatureFloat( ProspectThreadingInfo *thread, int feature_id) {
00256 if ( feature_id < 0 || feature_id > sizeof(feature_array) / sizeof(feature_array[0]) )
00257 return 0;
00258 return feature_array[feature_id].extract_func( thread, feature_array[feature_id].extract_arg );
00259 }
00260
00261 char *ThreadFeatureParse::GetFeatureStr( ProspectThreadingInfo *thread, char *feature_key ) {
00262
00263
00264 for (int i= 0; i < sizeof(feature_array) / sizeof(feature_array[0]); i++) {
00265 if ( !strcmp( feature_array[i].tag, feature_key ) ) {
00266 char buffer[100];
00267 sprintf(buffer, "%g", feature_array[i].extract_func( thread, feature_array[i].extract_arg ) );
00268 return strdup(buffer);
00269 }
00270 }
00271 if ( !strcmp( "ss", feature_key ) ) {
00272 if ( thread->parent == NULL )
00273 return NULL;
00274 char buffer[100];
00275 int num = thread->parent->GetTemplateNum( thread->template_name );
00276 float value = thread->parent->GetSortVal(num);
00277 sprintf(buffer, "%g", value );
00278 return strdup( buffer );
00279 }
00280
00281 if ( !strcmp( "n", feature_key ) ) {
00282 return strdup( thread->template_name );
00283 }
00284 return NULL;
00285 }
00286
00287
00288
00289 float ThreadFeatureParse::calc_averaw( ProspectThreadingInfo *thread, int foo) {
00290 pValType total = 0;
00291 for (int i = 0; i < e_count; i++) {
00292 pValType tmp = thread->energy[i] * thread->weight[i];
00293 if ( e_types[i] == et_single )
00294 tmp /= (pValType) thread->features.Get( AlignFeatures::fn_align);
00295 if ( e_types[i] == et_pair )
00296 tmp /= (pValType) thread->features.Get( AlignFeatures::fn_align_pair);
00297 if (!isnan(tmp))
00298 total += tmp;
00299 }
00300 return total;
00301 }
00302
00303
00304 float ThreadFeatureParse::get_raw_zscore( ProspectThreadingInfo *thread, int feature ) {
00305 if (thread->parent == NULL)
00306 return thread->scores.values[ ScoreStruct::SCORE_RAW ];
00307 float mean, sd;
00308 thread->parent->GetRawNormalize( mean, sd );
00309 return (thread->scores.values[ ScoreStruct::SCORE_RAW ] - mean) / sd;
00310 }
00311
00312
00313
00314 float ThreadFeatureParse::get_struct_feature( ProspectThreadingInfo *thread, int feature ) {
00315 switch (feature) {
00316 case f_template_len:
00317 return thread->template_len;
00318 break;
00319 case f_target_len:
00320 return thread->target_len;
00321 break;
00322 case f_core_count:
00323 return thread->core_count;
00324 break;
00325
00326
00327 case f_targ_temp_len:
00328 return ((float)thread->target_len)/((float)thread->template_len);
00329 break;
00330 case f_temp_targ_len:
00331 return ((float)thread->template_len)/((float)thread->target_len);
00332 break;
00333 }
00334 return 0;
00335 }
00336
00337 int get_alpha_count( char *str) {
00338 int i = 0;
00339 int count = 0;
00340 while ( str[i] ) {
00341 if ( isalpha( str[i] ) )
00342 count++;
00343 i++;
00344 }
00345 return count;
00346 }
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359 float *ThreadFeatureParse::get_struct_len_array( ProspectOutput *threads, int feature ) {
00360 char searchpath[200] = "/prospectOutput/threading/alignment/templ_seq";
00361 if ( f_target_len == feature )
00362 sprintf(searchpath, "/prospectOutput/threading/alignment/query_seq" );
00363
00364 int N = threads->GetThreadingCount();
00365 float *scores = (float *)malloc(sizeof(float) * N );
00366
00367 xmlDocPtr doc = (xmlDocPtr) threads->output_xml_handle;
00368 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
00369 if(xpathCtx == NULL) {
00370 return NULL;
00371 }
00372
00373 xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath, xpathCtx);
00374 if(xpathObj == NULL) {
00375 xmlXPathFreeContext(xpathCtx);
00376 return NULL;
00377 }
00378 int size = xpathObj->nodesetval->nodeNr;
00379 for(int i = 0; i < size; i++) {
00380 char * text;
00381
00382
00383
00384 text = (char *)xmlNodeGetContent(xpathObj->nodesetval->nodeTab[ i ]);
00385 scores[i] = get_alpha_count( text );
00386 free(text);
00387 }
00388
00389 xmlXPathFreeObject(xpathObj);
00390 xmlXPathFreeContext(xpathCtx);
00391 return scores;
00392 }
00393
00394
00395 float *ThreadFeatureParse::get_raw_zscore_array( ProspectOutput *threads, int feature ) {
00396 float mean, sd;
00397 threads->GetRawNormalize( mean, sd );
00398 float *array = get_score_array( threads, ScoreStruct::SCORE_RAW );
00399 int N = threads->GetThreadingCount();
00400 for ( int i = 0; i< N; i++) {
00401 array[i] = (array[i] - mean) / sd;
00402 }
00403 return array;
00404 }
00405
00406
00407 float* ThreadFeatureParse::get_score_array( ProspectOutput *threads, int feature ) {
00408 char searchpath[200] = "/prospectOutput/threading/score/z";
00409
00410 sprintf(searchpath, "/prospectOutput/threading/score/%s", ScoreStruct::GetScoreName( feature ) );
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444 int N = threads->GetThreadingCount();
00445 float *scores = (float *)malloc(sizeof(float) * N );
00446 xmlDocPtr doc = (xmlDocPtr) threads->output_xml_handle;
00447 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
00448 if(xpathCtx == NULL) {
00449 return NULL;
00450 }
00451
00452 xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath, xpathCtx);
00453 if(xpathObj == NULL) {
00454 xmlXPathFreeContext(xpathCtx);
00455 return NULL;
00456 }
00457 int size = xpathObj->nodesetval->nodeNr;
00458 for(int i = 0; i < size; i++) {
00459 char * text;
00460
00461
00462
00463 text = (char *)xmlNodeGetContent(xpathObj->nodesetval->nodeTab[ i ]);
00464 scores[i] = atof( text );
00465 free(text);
00466 }
00467
00468 xmlXPathFreeObject(xpathObj);
00469 xmlXPathFreeContext(xpathCtx);
00470 return scores;
00471 }
00472
00473
00474 float* ThreadFeatureParse::get_score_array_mean( ProspectOutput *threads, int feature ) {
00475 char searchpath[200] = "/prospectOutput/threading/score/z@mean";
00476 switch ( feature ) {
00477 case ScoreStruct::SCORE_RAW:
00478 strcpy( searchpath, "/prospectOutput/threading/score/raw@mean" );
00479 break;
00480 case ScoreStruct::SCORE_Z:
00481 strcpy( searchpath, "/prospectOutput/threading/score/z@mean" );
00482 break;
00483 case ScoreStruct::SCORE_ZFULL:
00484 strcpy( searchpath, "/prospectOutput/threading/score/zfull@mean" );
00485 break;
00486 case ScoreStruct::SCORE_NN:
00487 strcpy( searchpath, "/prospectOutput/threading/score/nn@mean" );
00488 break;
00489 case ScoreStruct::SCORE_ZMUTATIONLOG:
00490 strcpy( searchpath, "/prospectOutput/threading/score/zmutationlog@mean" );
00491 break;
00492 case ScoreStruct::SCORE_ZSINGLETON:
00493 strcpy( searchpath, "/prospectOutput/threading/score/zsingleton@mean" );
00494 break;
00495 case ScoreStruct::SCORE_ZSEC_STRUCT:
00496 strcpy( searchpath, "/prospectOutput/threading/score/zsec_struct@mean" );
00497 break;
00498 case ScoreStruct::SCORE_ZTWOBODY:
00499 strcpy( searchpath, "/prospectOutput/threading/score/ztwobody@mean" );
00500 break;
00501 case ScoreStruct::SCORE_ZDFIRE:
00502 strcpy( searchpath, "/prospectOutput/threading/score/zdfire@mean" );
00503 break;
00504 default:
00505 return NULL;
00506 }
00507 int N = threads->GetThreadingCount();
00508 float *scores = (float *)malloc(sizeof(float) * N );
00509 xmlDocPtr doc = (xmlDocPtr) threads->output_xml_handle;
00510 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
00511 if(xpathCtx == NULL) {
00512 return NULL;
00513 }
00514
00515 xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath, xpathCtx);
00516 if(xpathObj == NULL) {
00517 xmlXPathFreeContext(xpathCtx);
00518 return NULL;
00519 }
00520 int size = xpathObj->nodesetval->nodeNr;
00521 for(int i = 0; i < size; i++) {
00522 char * text;
00523
00524
00525
00526 text = (char *)xmlNodeGetContent(xpathObj->nodesetval->nodeTab[ i ]);
00527 scores[i] = atof( text );
00528 free(text);
00529 }
00530
00531 xmlXPathFreeObject(xpathObj);
00532 xmlXPathFreeContext(xpathCtx);
00533 return scores;
00534 }
00535
00536
00537
00538 float* ThreadFeatureParse::get_score_array_sd( ProspectOutput *threads, int feature ) {
00539 char searchpath[200] = "/prospectOutput/threading/score/z@sd";
00540 switch ( feature ) {
00541 case ScoreStruct::SCORE_RAW:
00542 strcpy( searchpath, "/prospectOutput/threading/score/raw@sd" );
00543 break;
00544 case ScoreStruct::SCORE_Z:
00545 strcpy( searchpath, "/prospectOutput/threading/score/z@sd" );
00546 break;
00547 case ScoreStruct::SCORE_ZFULL:
00548 strcpy( searchpath, "/prospectOutput/threading/score/zfull@sd" );
00549 break;
00550 case ScoreStruct::SCORE_NN:
00551 strcpy( searchpath, "/prospectOutput/threading/score/nn@sd" );
00552 break;
00553 case ScoreStruct::SCORE_ZMUTATIONLOG:
00554 strcpy( searchpath, "/prospectOutput/threading/score/zmutationlog@sd" );
00555 break;
00556 case ScoreStruct::SCORE_ZSINGLETON:
00557 strcpy( searchpath, "/prospectOutput/threading/score/zsingleton@sd" );
00558 break;
00559 case ScoreStruct::SCORE_ZSEC_STRUCT:
00560 strcpy( searchpath, "/prospectOutput/threading/score/zsec_struct@sd" );
00561 break;
00562 case ScoreStruct::SCORE_ZTWOBODY:
00563 strcpy( searchpath, "/prospectOutput/threading/score/ztwobody@sd" );
00564 break;
00565 case ScoreStruct::SCORE_ZDFIRE:
00566 strcpy( searchpath, "/prospectOutput/threading/score/zdfire@sd" );
00567 break;
00568 default:
00569 return NULL;
00570 }
00571 int N = threads->GetThreadingCount();
00572 float *scores = (float *)malloc(sizeof(float) * N );
00573 xmlDocPtr doc = (xmlDocPtr) threads->output_xml_handle;
00574 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
00575 if(xpathCtx == NULL) {
00576 return NULL;
00577 }
00578
00579 xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath, xpathCtx);
00580 if(xpathObj == NULL) {
00581 xmlXPathFreeContext(xpathCtx);
00582 return NULL;
00583 }
00584 int size = xpathObj->nodesetval->nodeNr;
00585 for(int i = 0; i < size; i++) {
00586 char * text;
00587
00588
00589
00590 text = (char *)xmlNodeGetContent(xpathObj->nodesetval->nodeTab[ i ]);
00591 scores[i] = atof( text );
00592 free(text);
00593 }
00594
00595 xmlXPathFreeObject(xpathObj);
00596 xmlXPathFreeContext(xpathCtx);
00597 return scores;
00598 }
00599
00600
00601 float* ThreadFeatureParse::get_weight_array( ProspectOutput *threads, int feature ) {
00602 char searchpath[200];
00603 sprintf( searchpath, "/prospectOutput/threading/weights/%s", e_names[ feature ] );
00604 int N = threads->GetThreadingCount();
00605 float *scores = (float *)malloc(sizeof(float) * N );
00606 xmlDocPtr doc = (xmlDocPtr) threads->output_xml_handle;
00607 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
00608 if(xpathCtx == NULL) {
00609 return NULL;
00610 }
00611
00612 xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath, xpathCtx);
00613 if(xpathObj == NULL) {
00614 xmlXPathFreeContext(xpathCtx);
00615 return NULL;
00616 }
00617 int size = xpathObj->nodesetval->nodeNr;
00618 for(int i = 0; i < size; i++) {
00619 char * text;
00620
00621
00622
00623 text = (char *)xmlNodeGetContent(xpathObj->nodesetval->nodeTab[ i ]);
00624 scores[i] = atof( text );
00625 free(text);
00626 }
00627
00628 xmlXPathFreeObject(xpathObj);
00629 xmlXPathFreeContext(xpathCtx);
00630 return scores;
00631 }
00632
00633 float* ThreadFeatureParse::get_energy_array( ProspectOutput *threads, int feature ) {
00634 char searchpath[200];
00635 sprintf( searchpath, "/prospectOutput/threading/energy/%s", e_names[ feature ] );
00636 int N = threads->GetThreadingCount();
00637 float *scores = (float *)malloc(sizeof(float) * N );
00638 xmlDocPtr doc = (xmlDocPtr) threads->output_xml_handle;
00639 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
00640 if(xpathCtx == NULL) {
00641 return NULL;
00642 }
00643
00644 xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath, xpathCtx);
00645 if(xpathObj == NULL) {
00646 xmlXPathFreeContext(xpathCtx);
00647 return NULL;
00648 }
00649 int size = xpathObj->nodesetval->nodeNr;
00650 for(int i = 0; i < size; i++) {
00651 char * text;
00652
00653
00654
00655 text = (char *)xmlNodeGetContent(xpathObj->nodesetval->nodeTab[ i ]);
00656 scores[i] = atof( text );
00657 free(text);
00658 }
00659
00660 xmlXPathFreeObject(xpathObj);
00661 xmlXPathFreeContext(xpathCtx);
00662 return scores;
00663 }
00664
00665 float* ThreadFeatureParse::get_energy_weight_array( ProspectOutput *threads, int feature ) {
00666 float *tmp1 = get_energy_array( threads, feature );
00667 float *tmp2 = get_weight_array( threads, feature );
00668 int N = threads->GetThreadingCount();
00669 for (int i = 0; i < N; i++) {
00670 tmp1[i] *= tmp2[i];
00671 }
00672 free(tmp2);
00673 return tmp1;
00674 }
00675
00676
00677
00678 float* ThreadFeatureParse::get_align_feature_array( ProspectOutput *threads, int feature ) {
00679 char searchpath[200];
00680 sprintf( searchpath, "/prospectOutput/threading/features/%s", AlignFeatures::GetName( feature ) );
00681 int N = threads->GetThreadingCount();
00682 float *scores = (float *)malloc(sizeof(float) * N );
00683 xmlDocPtr doc = (xmlDocPtr) threads->output_xml_handle;
00684 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
00685 if(xpathCtx == NULL) {
00686 return NULL;
00687 }
00688
00689 xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( (const xmlChar*)searchpath, xpathCtx);
00690 if(xpathObj == NULL) {
00691 xmlXPathFreeContext(xpathCtx);
00692 return NULL;
00693 }
00694 int size = xpathObj->nodesetval->nodeNr;
00695 for(int i = 0; i < size; i++) {
00696 char * text;
00697
00698
00699
00700 text = (char *)xmlNodeGetContent(xpathObj->nodesetval->nodeTab[ i ]);
00701 scores[i] = atof( text );
00702 free(text);
00703 }
00704
00705 xmlXPathFreeObject(xpathObj);
00706 xmlXPathFreeContext(xpathCtx);
00707 return scores;
00708 }
00709
00710
00711
00712 void append_to_str(char **str1, char *str2);
00713
00714 char *ThreadFeatureParse::ParseFeatureStr( ProspectThreadingInfo *thread_info, char *format_str ) {
00715
00716 char *out_str = NULL;
00717
00718 for (int j = 0; j < strlen(format_str); j++) {
00719 if (format_str[j] == '%') {
00720 j++;
00721 int k = j;
00722 while ( format_str[k] != '\n' &&
00723 format_str[k] != ' ' &&
00724 format_str[k] != '\t' &&
00725 format_str[k] != '\\' &&
00726 format_str[k] != '%' &&
00727 format_str[k] != 0)
00728 k++;
00729 char format_word[201];
00730 for (int l = 0; l < k-j && l < 200; l++) {
00731 format_word[l] = format_str[l+j];
00732 format_word[l+1] = 0;
00733 }
00734
00735 char *tmp = GetFeatureStr( thread_info, format_word);
00736 if ( tmp ) {
00737 append_to_str( &out_str, tmp );
00738 free( tmp );
00739 }
00740 j = k-1;
00741 } else if (format_str[j] == '\\') {
00742 j++;
00743 switch (format_str[j]) {
00744 case 't':
00745
00746 append_to_str( &out_str, "\t" );
00747 break;
00748 }
00749 } else {
00750 char buffer[10];
00751 sprintf(buffer, "%c", format_str[j]);
00752 append_to_str( &out_str, buffer );
00753 }
00754 }
00755 return out_str;
00756 }
00757
00758
00759 float ThreadFeatureParse::ParseFeatureEqn(ProspectThreadingInfo* thread, char* eqn_str) {
00760 ThreadFeatureParse* tmp_parse = ThreadFeatureParse::CompileParse( eqn_str );
00761 float tmp_float = tmp_parse->RunParse( thread );
00762 delete tmp_parse;
00763 return tmp_float;
00764 }
00765
00766
00767
00768
00769
00770 #include <list>
00771
00772 extern int mem_stack;
00773 extern std::list<feature_parse_op> op_stack;
00774
00775 extern int yy_scan_string(const char *);
00776 int yyparse (void);
00777
00778
00779
00780 ThreadFeatureParse* ThreadFeatureParse::CompileParse( char *eqn_str ) {
00781 char *buffer = strdup( eqn_str );
00782 mem_stack = 0;
00783 op_stack.empty();
00784 yy_scan_string (buffer);
00785 yyparse();
00786 int c = 0;
00787 feature_parse_op *ops_tmp = new feature_parse_op[ op_stack.size() ];
00788 for ( std::list<feature_parse_op>::iterator i = op_stack.begin(); i != op_stack.end(); ++i) {
00789 ops_tmp[c] = (*i);
00790 c++;
00791 }
00792 free( buffer );
00793 ThreadFeatureParse *out_parse = new ThreadFeatureParse( ops_tmp, c, mem_stack);
00794 mem_stack = 0;
00795 op_stack.empty();
00796 return out_parse;
00797 }
00798
00799
00800
00801 float ThreadFeatureParse::RunFeatureProgram( ProspectThreadingInfo *thread, float *feature_vals ) {
00802 if ( thread == NULL && feature_vals == NULL )
00803 return 0;
00804 for (int i = 0; i < op_count; i++) {
00805 switch (ops[i].op_code) {
00806 case FPOP_SET:
00807 #ifdef PRINT_FPOPS
00808 printf("SET %d\t%f\n", ops[i].mem_store, ops[i].op_arg.val );
00809 #endif
00810 mem[ ops[i].mem_store ] = ops[i].op_arg.val;
00811 break;
00812 case FPOP_GET:
00813 if ( thread ) {
00814 mem[ ops[i].mem_store ] = ThreadFeatureParse::GetFeatureFloat( thread, ops[i].op_arg.reg[0] );
00815 } else {
00816 mem[ ops[i].mem_store ] = feature_vals[ ops[i].op_arg.reg[0] ];
00817 }
00818 #ifdef PRINT_FPOPS
00819 printf("GET %d\t%d(%f)\n", ops[i].mem_store, ops[i].op_arg.reg[0], mem[ ops[i].mem_store ] );
00820 #endif
00821 break;
00822 case FPOP_ADD:
00823 mem[ ops[i].mem_store ] = mem[ ops[i].op_arg.reg[0] ] + mem[ ops[i].op_arg.reg[1] ];
00824 #ifdef PRINT_FPOPS
00825 printf("ADD %d\t%d\t%d\t(%f)\n", ops[i].mem_store, ops[i].op_arg.reg[0], ops[i].op_arg.reg[1], mem[ ops[i].mem_store ] );
00826 #endif
00827 break;
00828 case FPOP_SUB:
00829 mem[ ops[i].mem_store ] = mem[ ops[i].op_arg.reg[0] ] - mem[ ops[i].op_arg.reg[1] ];
00830 #ifdef PRINT_FPOPS
00831 printf("SUB %d\t%d\t%d\n", ops[i].mem_store, ops[i].op_arg.reg[0], ops[i].op_arg.reg[1] );
00832 #endif
00833 break;
00834 case FPOP_MUL:
00835 mem[ ops[i].mem_store ] = mem[ ops[i].op_arg.reg[0] ] * mem[ ops[i].op_arg.reg[1] ];
00836 #ifdef PRINT_FPOPS
00837 printf("MUL %d\t%d\t%d\n", ops[i].mem_store, ops[i].op_arg.reg[0], ops[i].op_arg.reg[1] );
00838 #endif
00839 break;
00840 case FPOP_DIV:
00841 mem[ ops[i].mem_store ] = mem[ ops[i].op_arg.reg[0] ] / mem[ ops[i].op_arg.reg[1] ];
00842 #ifdef PRINT_FPOPS
00843 printf("DIV %d\t%d\t%d\n", ops[i].mem_store, ops[i].op_arg.reg[0], ops[i].op_arg.reg[1] );
00844 #endif
00845 break;
00846 case FPOP_POW:
00847 mem[ ops[i].mem_store ] = pow( mem[ ops[i].op_arg.reg[0] ], mem[ ops[i].op_arg.reg[1] ] );
00848 #ifdef PRINT_FPOPS
00849 printf("POW %d\t%d\t%d\n", ops[i].mem_store, ops[i].op_arg.reg[0], ops[i].op_arg.reg[1] );
00850 #endif
00851 break;
00852 case FPOP_SRT:
00853 mem[ ops[i].mem_store ] = sqrt( mem[ ops[i].op_arg.reg[0] ] );
00854 break;
00855 case FPOP_LOG:
00856 mem[ ops[i].mem_store ] = log( mem[ ops[i].op_arg.reg[0] ] );
00857 break;
00858 case FPOP_SIN:
00859 mem[ ops[i].mem_store ] = sin( mem[ ops[i].op_arg.reg[0] ] );
00860 break;
00861 case FPOP_COS:
00862 mem[ ops[i].mem_store ] = cos( mem[ ops[i].op_arg.reg[0] ] );
00863 break;
00864 case FPOP_NEG:
00865 mem[ ops[i].mem_store ] = -(mem[ ops[i].op_arg.reg[0] ]);
00866 #ifdef PRINT_FPOPS
00867 printf("NEG %d\t%d\n", ops[i].mem_store, ops[i].op_arg.reg[0] );
00868 #endif
00869 break;
00870 case FPOP_ABS:
00871 mem[ ops[i].mem_store ] = fabs( mem[ ops[i].op_arg.reg[0] ] );
00872 break;
00873 case FPOP_RES:
00874 #ifdef PRINT_FPOPS
00875 printf("RES\t%d\t%f\n", ops[i].op_arg.reg[0], mem[ ops[i].op_arg.reg[0] ] );
00876 #endif
00877 return mem[ ops[i].op_arg.reg[0] ] ;
00878 break;
00879 }
00880 }
00881 return 0;
00882
00883
00884 }
00885
00886
00887 float ThreadFeatureParse::RunParse( ProspectThreadingInfo *thread ) {
00888 return RunFeatureProgram( thread );
00889 }
00890
00891
00892 float *ThreadFeatureParse::RunParse( ProspectOutput *threads ) {
00893 int N = threads->GetThreadingCount();
00894 int feature_id_count = (sizeof(feature_array) / sizeof(feature_array[0]) );
00895 float **feature_val = (float **)malloc( sizeof(float *) * feature_id_count );
00896 memset( feature_val, 0, sizeof(float *) * feature_id_count);
00897
00898 for (int i = 0; i < op_count; i++) {
00899 if ( ops[i].op_code == FPOP_GET ) {
00900 int feature_id = ops[i].op_arg.reg[0];
00901 if ( feature_array[feature_id].extract_array_func != NULL ) {
00902 feature_val[ feature_id ] = feature_array[feature_id].extract_array_func( threads, feature_array[feature_id].extract_arg );
00903 }
00904 }
00905 }
00906 float *array = new float[ N ];
00907 float tmp_array[ feature_id_count ];
00908 for (int i = 0; i < N; i++) {
00909 for (int j = 0; j < feature_id_count; j++) {
00910 if ( feature_val[j] != NULL )
00911 tmp_array[j] = feature_val[j][i];
00912 else
00913 tmp_array[j] = 0;
00914 }
00915 array[i] = RunFeatureProgram( NULL, tmp_array );
00916 }
00917 for (int j = 0; j < feature_id_count; j++) {
00918 if ( feature_val[j] != NULL )
00919 free( feature_val[j] );
00920 }
00921 free(feature_val);
00922 return array;
00923 }