src/common/score_functions.cc

00001 
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <assert.h>
00005 #include <string.h>
00006 #include <math.h>
00007 
00008 #include <pthread.h>
00009 
00010 
00011 #include "open_prospect.h"
00012 
00013 #include "../dynamic_threading/dynamic_threading.h"
00014 
00015 
00016 
00017 int count_nalign(  TargetStruct *target_data, AlignmentStruct *alignment ) {
00018         int nalign = 0;
00019         for (int i = 0; i < target_data->len; i++) {
00020                 if (alignment->GetTargetAlign(i) != -1)
00021                         nalign++;
00022         }
00023         return nalign;  
00024 }
00025 
00026 
00027 
00028 ScoreStruct::ScoreStruct(void ) {
00029         mask = 0x00000000;      
00030 }
00031 
00032 ScoreStruct::~ScoreStruct(void ) {
00033                 
00034 }
00035 
00036 
00037 
00038 char score_names[ScoreStruct::SCORE_COUNT_TOTAL][15] = {
00039         "raw",
00040         "z",
00041         "zfull",
00042         "nn",
00043         "user0",
00044         "user1",
00045         "user2",
00046         "user3",
00047         "zmutationlog",
00048         "zsingleton",
00049         "zsec_struct",
00050         "ztwobody",
00051         "zdfire"
00052 };
00053 
00054 
00055 char *ScoreStruct::GetScoreName(int num) {
00056         return score_names[num];        
00057 }
00058 
00059 
00060 
00061 
00062 pValType ScoreStruct::ScoreRaw( ProspectParam *param,
00063                                                           WeightArray weight,            
00064                                                           TemplateStruct *template_data,
00065                                                           TargetStruct *target_data, 
00066                                                           AlignmentStruct *alignment) {
00067         EnergyArray energy;
00068         alignment->GetEnergies(param, template_data, target_data, energy);      
00069         pValType val = 0;
00070         for (int i = 0; i< e_count; i++) {
00071                 val += weight[i] * energy[i];
00072         }       
00073         this->values[ SCORE_RAW ] = val;
00074         this->mask |= (0x01 << SCORE_RAW);
00075         return val;
00076 }
00077 
00078 //--------------------------------------------------------------------
00079 void shuffle_seq(TargetStruct *src, TargetStruct *dst) {
00080         char seq_mask[ src->len ];
00081         memset( seq_mask, 0x00, src->len );     
00082         for (int i=0; i < src->len;i++) {
00083                 int j;
00084                 do {
00085                         j = ((int)(((pValType)src->len * ((pValType)rand()) / ((pValType)RAND_MAX)))) % src->len;
00086                 } while ( seq_mask[j] );        
00087 #if 1
00088                 dst->residue[i] = src->residue[j];
00089 #else
00090                 dst->residue[i].RS = src->residue[j].RS;
00091                 memcpy( dst->residue[i].FREQ, src->residue[j].FREQ, sizeof(pValType) * 20 );    
00092 #endif
00093                 dst->residue[i].orig_num = j;
00094                 seq_mask[j] = 1;
00095         }
00096 }
00097 
00098 typedef struct  {
00099         ProspectParam *param;
00100         WeightArray weight; 
00101         TemplateStruct *template_data;
00102         TargetStruct *target_data;
00103         EnergyArray *energy_array;
00104         AlignmentStruct *original_align;
00105         int step_start;
00106         int step_size;  
00107         bool full;
00108         long score_mask;
00109 } z_loop_calc_data;
00110 
00111 
00112 void *z_loop_calc_wrapper(void *arg) {
00113         z_loop_calc_data *data = (z_loop_calc_data*)arg;
00114         if ( data->full ) {
00115                 TargetStruct tmp_target;                                                 
00116                 tmp_target.len = data->target_data->len;
00117                 tmp_target.distconst_count = 0;
00118                 tmp_target.twobody = data->target_data->twobody; //this is a hack, original twobody table
00119                 tmp_target.residue = (TargetResidue *)malloc(sizeof(TargetResidue) * data->target_data->len );
00120                 for (int i = 0; i < data->target_data->len; i++) {
00121                         tmp_target.residue[i] = data->target_data->residue[i];
00122                 }       
00123                 for (int i = 0 + data->step_start; i < data->param->z_cycles; i += data->step_size) {
00124                         AlignmentStruct  new_alignment;
00125                         shuffle_seq(data->target_data, &tmp_target );
00126                         char *status = dynamic_thread(data->param, data->weight, data->template_data, &tmp_target, &new_alignment );
00127                         new_alignment.GetEnergies(data->param, data->template_data, &tmp_target, data->energy_array[i]);
00128                 }       
00129                 tmp_target.twobody = NULL; //undo the hack, so the twobody table isn't delallocated
00130         } else {
00131                 TargetStruct tmp_target;
00132                 tmp_target.len = data->target_data->len;
00133                 tmp_target.twobody = data->target_data->twobody;        
00134                 tmp_target.residue = (TargetResidue *)malloc(sizeof(TargetResidue) * data->target_data->len );  
00135                 for (int i = 0; i < data->target_data->len; i++) {
00136                         tmp_target.residue[i] = data->target_data->residue[i];
00137                 }
00138                 for (int i = 0 + data->step_start; i < data->param->z_cycles; i += data->step_size) {
00139                         shuffle_seq(data->target_data, &tmp_target );
00140                         data->original_align->GetEnergies(data->param, data->template_data, &tmp_target, data->energy_array[i],  data->score_mask);
00141                 }       
00142                 tmp_target.twobody = NULL; //undo the hack, so the twobody table isn't delallocated
00143         }       
00144         if ( data->step_size > 1 )
00145                 pthread_exit(NULL);
00146         return NULL;
00147 }
00148 
00149 
00150 
00151 pValType energy_array_stat( ProspectParam *param, WeightArray weight, 
00152                                                         TemplateStruct *template_data, TargetStruct *target_data,
00153                                                         AlignmentStruct *original_align,
00154                                                         EnergyArray *energy_array, ScoreStruct *scores, 
00155                                                         bool full = false, long score_mask = 0xFFFFFFFF ) {             
00156         double raw_thread, total, variance, sd, zscore, mean, raw_original;
00157         EnergyArray energy_total, energy_mean, energy_sd, energy_z, energy_original;
00158 
00159         
00160         original_align->GetEnergies(param, template_data, target_data, energy_original, score_mask);
00161         raw_original = 0;
00162         for (long i = 0; i < e_count; i++) {
00163                 if ( e_types[i] != et_gap )
00164                         raw_original += (double)weight[i] * (double)energy_original[i];
00165         }
00166         
00167         double *raw_scores = (double *)malloc(sizeof(double) * param->z_cycles);
00168         for (long i = 0; i < param->z_cycles; i++) {
00169                 raw_scores[i] = 0;
00170                 for (long j = 0; j < e_count; j++) 
00171                         raw_scores[i] += (double)weight[j] * (double)energy_array[i][j];
00172         }
00173         
00174         total = 0;
00175         for (int i = 0; i < param->z_cycles; i++) {
00176                 total += raw_scores[i];
00177         }
00178         mean = total / (pValType)param->z_cycles;
00179         total = 0;
00180         for (int i = 0; i < param->z_cycles; i++) {
00181                 total += pow( raw_scores[i] - mean, 2);
00182         }
00183         variance = total / (pValType)param->z_cycles;
00184         sd = sqrt(variance);
00185         if ( sd == 0 )
00186                 sd = 1;
00187         zscore = (mean - raw_thread)/sd;
00188         
00189         
00190         //total energy zscore   
00191         total = 0;
00192         for (long i = 0; i < param->z_cycles; i++) {
00193                 total += (double)raw_scores[i];
00194         }
00195         mean = total / (double)param->z_cycles;
00196         total = 0;
00197         for (long i = 0; i < param->z_cycles; i++) {
00198                 total += pow( raw_scores[i] - mean, 2);
00199         }
00200         variance = total / (double)param->z_cycles;
00201         sd = sqrt(variance);
00202         
00203         zscore = (mean - raw_original)/sd;      
00204         
00205         if (param->v_level > 0) {
00206                 printf("raw: %lf  mean: %lf  sd: %lf  zscore: %lf\n", 
00207                            raw_thread, mean, sd, zscore );
00208         }       
00209         //      assert( !isnan(zscore) );
00210         if ( isnan(zscore) ) {
00211                 zscore = 0;
00212         }
00213         free(raw_scores);               
00214         if ( full ) {
00215                 scores->values[ ScoreStruct::SCORE_ZFULL ] = zscore;
00216                 scores->z_sd[ ScoreStruct::SCORE_ZFULL ] = sd; scores->z_mean[ ScoreStruct::SCORE_ZFULL ] = mean; 
00217         } else {
00218                 scores->values[ ScoreStruct::SCORE_Z ] = zscore;
00219                 scores->z_sd[ ScoreStruct::SCORE_Z ] = sd; scores->z_mean[ ScoreStruct::SCORE_Z ] = mean; 
00220         }
00221         
00222         
00223         for (int j = 0; j < e_count; j++) {
00224                 energy_total[j] = 0;
00225         }
00226         
00227         for (long i = 0; i < param->z_cycles; i++) {
00228                 for (int j = 0; j < e_count; j++) {
00229                         energy_total[j] += energy_array[i][j];
00230                 }
00231         }
00232         for (int i = 0; i < e_count; i++) {
00233                 energy_mean[i] = energy_total[i] / (pValType)  param->z_cycles;
00234         }
00235         for (int j = 0; j < e_count; j++) {
00236                 energy_total[j] = 0;
00237         }
00238         for (long i = 0; i < param->z_cycles; i++) {
00239                 for (int j = 0; j < e_count; j++) {
00240                         energy_total[j] += pow(energy_array[i][j] - energy_mean[j], 2);
00241                         //      assert( !isnan(energy_total[j]) );
00242                 }
00243         }
00244         for (int i = 0; i < e_count; i++) {
00245                 energy_sd[i] = sqrt( energy_total[i] / (pValType)  param->z_cycles );
00246         }
00247         
00248         for (int i = 0; i < e_count; i++) {
00249                 energy_z[i] = (energy_mean[i] - energy_original[i]) / energy_sd[i];
00250         }
00251         
00252         scores->values[ ScoreStruct::SCORE_ZMUTATIONLOG ] = energy_z[ e_mutationlog ];
00253         scores->z_sd[ ScoreStruct::SCORE_ZMUTATIONLOG ] = energy_sd[ e_mutationlog ]; scores->z_mean[ ScoreStruct::SCORE_ZMUTATIONLOG ] = energy_mean[ e_mutationlog ]; 
00254         scores->values[ ScoreStruct::SCORE_ZSINGLETON ] = energy_z[ e_singleton ];
00255         scores->z_sd[ ScoreStruct::SCORE_ZSINGLETON ] = energy_sd[ e_singleton ]; scores->z_mean[ ScoreStruct::SCORE_ZSINGLETON ] = energy_mean[ e_singleton ]; 
00256         scores->values[ ScoreStruct::SCORE_ZSEC_STRUCT ] = energy_z[ e_sec_struct ];
00257         scores->z_sd[ ScoreStruct::SCORE_ZSEC_STRUCT ] = energy_sd[ e_sec_struct ]; scores->z_mean[ ScoreStruct::SCORE_ZSEC_STRUCT ] = energy_mean[ e_sec_struct ]; 
00258         scores->values[ ScoreStruct::SCORE_ZTWOBODY ] = energy_z[ e_twobody ];
00259         scores->z_sd[ ScoreStruct::SCORE_ZTWOBODY ] = energy_sd[ e_twobody ]; scores->z_mean[ ScoreStruct::SCORE_ZTWOBODY ] = energy_mean[ e_twobody ]; 
00260         scores->values[ ScoreStruct::SCORE_ZDFIRE ] = energy_z[ e_dfire ];
00261         scores->z_sd[ ScoreStruct::SCORE_ZDFIRE ] = energy_sd[ e_dfire ]; scores->z_mean[ ScoreStruct::SCORE_ZDFIRE ] = energy_mean[ e_dfire ]; 
00262         
00263         for (int i = ScoreStruct::SCORE_ZENERGY_START; i < ScoreStruct::SCORE_ZENERGY_END; i++) {
00264                 if ( isnan( scores->values[i] ) ) {
00265                         scores->values[i] = 0.0;
00266                 }
00267         }               
00268         return zscore;
00269 }
00270 
00271 
00272 
00273 pValType ScoreStruct::ScoreZ(ProspectParam *param,
00274                                                  WeightArray weight,             
00275                                                  TemplateStruct *template_data, 
00276                                                  TargetStruct *target_data,
00277                                                  AlignmentStruct *original_align) {
00278         long score_mask = (e_mask( e_mutationlog ) | e_mask( e_singleton ) | e_mask( e_sec_struct ) | e_mask( e_twobody ) | e_mask( e_dfire ) );
00279         EnergyArray  *energy_array = (EnergyArray *)malloc(sizeof(EnergyArray) * param->z_cycles);
00280         z_loop_calc_data *zloop_data = (z_loop_calc_data *)malloc(sizeof(z_loop_calc_data) * param->ncpus );
00281         
00282         for (int i = 0; i < param->ncpus; i++) {        
00283                 for (int j= 0; j < e_count; j++)
00284                         zloop_data[i].weight[j] = weight[j];
00285                 zloop_data[i].param = param;
00286                 zloop_data[i].template_data = template_data; 
00287                 zloop_data[i].target_data = target_data;
00288                 zloop_data[i].energy_array = energy_array;
00289                 zloop_data[i].step_size = param->ncpus;                         
00290                 zloop_data[i].original_align = original_align;
00291                 zloop_data[i].step_start = i;
00292                 zloop_data[i].full = false;
00293                 zloop_data[i].score_mask = score_mask;
00294         }       
00295         if ( param->ncpus > 1 ) {               
00296                 pthread_t *threads;
00297                 threads = (pthread_t *)malloc(sizeof(pthread_t) * param->ncpus);
00298                 for (int i = 0; i < param->ncpus; i++) {        
00299                         pthread_create(&threads[i], NULL, z_loop_calc_wrapper, &zloop_data[i]);
00300                 }
00301                 for (int i = 0; i < param->ncpus; i++) {        
00302                         pthread_join(threads[i], NULL);
00303                 }
00304                 free(threads);
00305                 free(zloop_data);               
00306         } else {
00307                 z_loop_calc_wrapper( zloop_data );
00308         }       
00309         pValType zscore = energy_array_stat( param, weight, 
00310                                                                                  template_data, target_data,
00311                                                                                  original_align,
00312                                                                                  energy_array, this, 
00313                                                                                  false, score_mask );
00314         free( energy_array );   
00315         this->values[ SCORE_Z ] = zscore;
00316         this->mask |= (0x01 << SCORE_Z) ;
00317 
00318         return zscore;
00319 }
00320 
00321 
00322 pValType ScoreStruct::ScoreZFull(ProspectParam *param,
00323                                                                  WeightArray weight,             
00324                                                                  TemplateStruct *template_data, 
00325                                                                  TargetStruct *target_data,
00326                                                                  AlignmentStruct *original_align) {
00327         EnergyArray  *energy_array = (EnergyArray *)malloc(sizeof(EnergyArray) * param->z_cycles);
00328         z_loop_calc_data *zloop_data = (z_loop_calc_data *)malloc(sizeof(z_loop_calc_data) * param->ncpus );
00329         for (int i = 0; i < param->ncpus; i++) {        
00330                 for (int j= 0; j < e_count; j++)
00331                         zloop_data[i].weight[j] = weight[j];
00332                 zloop_data[i].param = param;
00333                 zloop_data[i].template_data = template_data; 
00334                 zloop_data[i].target_data = target_data;
00335                 zloop_data[i].energy_array = energy_array;
00336                 zloop_data[i].step_size = param->ncpus;                 
00337                 zloop_data[i].original_align = original_align;
00338                 zloop_data[i].step_start = i;
00339                 zloop_data[i].full = true;              
00340                 zloop_data[i].score_mask = 0xFFFFFFFF;
00341         }
00342         
00343         if ( param->ncpus > 1 ) {               
00344                 pthread_t *threads;
00345                 threads = (pthread_t *)malloc(sizeof(pthread_t) * param->ncpus);
00346                 for (int i = 0; i < param->ncpus; i++) {        
00347                         pthread_create(&threads[i], NULL, z_loop_calc_wrapper, &zloop_data[i]);
00348                 }
00349                 for (int i = 0; i < param->ncpus; i++) {        
00350                         pthread_join(threads[i], NULL);
00351                 }
00352                 free(threads);
00353                 free(zloop_data);               
00354         } else {
00355                 z_loop_calc_wrapper( zloop_data );
00356         }
00357         pValType zscore = energy_array_stat( param, weight, 
00358                                                                                  template_data, target_data,
00359                                                                                  original_align,
00360                                                                                  energy_array, this, 
00361                                                                                  true, 0xFFFFFFFF );
00362         free( energy_array );   
00363         this->values[ SCORE_ZFULL ] = zscore;
00364         this->mask |= (0x01 << SCORE_ZFULL) ;
00365 
00366         return zscore;
00367 }
00368 
00369 
00370 

Generated on Wed Apr 11 16:50:50 2007 for open_prospect by  doxygen 1.4.6