00001
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <assert.h>
00005 #include <string.h>
00006 #include <math.h>
00007
00008 #include <pthread.h>
00009
00010
00011 #include "open_prospect.h"
00012
00013 #include "../dynamic_threading/dynamic_threading.h"
00014
00015
00016
00017 int count_nalign( TargetStruct *target_data, AlignmentStruct *alignment ) {
00018 int nalign = 0;
00019 for (int i = 0; i < target_data->len; i++) {
00020 if (alignment->GetTargetAlign(i) != -1)
00021 nalign++;
00022 }
00023 return nalign;
00024 }
00025
00026
00027
00028 ScoreStruct::ScoreStruct(void ) {
00029 mask = 0x00000000;
00030 }
00031
00032 ScoreStruct::~ScoreStruct(void ) {
00033
00034 }
00035
00036
00037
00038 char score_names[ScoreStruct::SCORE_COUNT_TOTAL][15] = {
00039 "raw",
00040 "z",
00041 "zfull",
00042 "nn",
00043 "user0",
00044 "user1",
00045 "user2",
00046 "user3",
00047 "zmutationlog",
00048 "zsingleton",
00049 "zsec_struct",
00050 "ztwobody",
00051 "zdfire"
00052 };
00053
00054
00055 char *ScoreStruct::GetScoreName(int num) {
00056 return score_names[num];
00057 }
00058
00059
00060
00061
00062 pValType ScoreStruct::ScoreRaw( ProspectParam *param,
00063 WeightArray weight,
00064 TemplateStruct *template_data,
00065 TargetStruct *target_data,
00066 AlignmentStruct *alignment) {
00067 EnergyArray energy;
00068 alignment->GetEnergies(param, template_data, target_data, energy);
00069 pValType val = 0;
00070 for (int i = 0; i< e_count; i++) {
00071 val += weight[i] * energy[i];
00072 }
00073 this->values[ SCORE_RAW ] = val;
00074 this->mask |= (0x01 << SCORE_RAW);
00075 return val;
00076 }
00077
00078
00079 void shuffle_seq(TargetStruct *src, TargetStruct *dst) {
00080 char seq_mask[ src->len ];
00081 memset( seq_mask, 0x00, src->len );
00082 for (int i=0; i < src->len;i++) {
00083 int j;
00084 do {
00085 j = ((int)(((pValType)src->len * ((pValType)rand()) / ((pValType)RAND_MAX)))) % src->len;
00086 } while ( seq_mask[j] );
00087 #if 1
00088 dst->residue[i] = src->residue[j];
00089 #else
00090 dst->residue[i].RS = src->residue[j].RS;
00091 memcpy( dst->residue[i].FREQ, src->residue[j].FREQ, sizeof(pValType) * 20 );
00092 #endif
00093 dst->residue[i].orig_num = j;
00094 seq_mask[j] = 1;
00095 }
00096 }
00097
00098 typedef struct {
00099 ProspectParam *param;
00100 WeightArray weight;
00101 TemplateStruct *template_data;
00102 TargetStruct *target_data;
00103 EnergyArray *energy_array;
00104 AlignmentStruct *original_align;
00105 int step_start;
00106 int step_size;
00107 bool full;
00108 long score_mask;
00109 } z_loop_calc_data;
00110
00111
00112 void *z_loop_calc_wrapper(void *arg) {
00113 z_loop_calc_data *data = (z_loop_calc_data*)arg;
00114 if ( data->full ) {
00115 TargetStruct tmp_target;
00116 tmp_target.len = data->target_data->len;
00117 tmp_target.distconst_count = 0;
00118 tmp_target.twobody = data->target_data->twobody;
00119 tmp_target.residue = (TargetResidue *)malloc(sizeof(TargetResidue) * data->target_data->len );
00120 for (int i = 0; i < data->target_data->len; i++) {
00121 tmp_target.residue[i] = data->target_data->residue[i];
00122 }
00123 for (int i = 0 + data->step_start; i < data->param->z_cycles; i += data->step_size) {
00124 AlignmentStruct new_alignment;
00125 shuffle_seq(data->target_data, &tmp_target );
00126 char *status = dynamic_thread(data->param, data->weight, data->template_data, &tmp_target, &new_alignment );
00127 new_alignment.GetEnergies(data->param, data->template_data, &tmp_target, data->energy_array[i]);
00128 }
00129 tmp_target.twobody = NULL;
00130 } else {
00131 TargetStruct tmp_target;
00132 tmp_target.len = data->target_data->len;
00133 tmp_target.twobody = data->target_data->twobody;
00134 tmp_target.residue = (TargetResidue *)malloc(sizeof(TargetResidue) * data->target_data->len );
00135 for (int i = 0; i < data->target_data->len; i++) {
00136 tmp_target.residue[i] = data->target_data->residue[i];
00137 }
00138 for (int i = 0 + data->step_start; i < data->param->z_cycles; i += data->step_size) {
00139 shuffle_seq(data->target_data, &tmp_target );
00140 data->original_align->GetEnergies(data->param, data->template_data, &tmp_target, data->energy_array[i], data->score_mask);
00141 }
00142 tmp_target.twobody = NULL;
00143 }
00144 if ( data->step_size > 1 )
00145 pthread_exit(NULL);
00146 return NULL;
00147 }
00148
00149
00150
00151 pValType energy_array_stat( ProspectParam *param, WeightArray weight,
00152 TemplateStruct *template_data, TargetStruct *target_data,
00153 AlignmentStruct *original_align,
00154 EnergyArray *energy_array, ScoreStruct *scores,
00155 bool full = false, long score_mask = 0xFFFFFFFF ) {
00156 double raw_thread, total, variance, sd, zscore, mean, raw_original;
00157 EnergyArray energy_total, energy_mean, energy_sd, energy_z, energy_original;
00158
00159
00160 original_align->GetEnergies(param, template_data, target_data, energy_original, score_mask);
00161 raw_original = 0;
00162 for (long i = 0; i < e_count; i++) {
00163 if ( e_types[i] != et_gap )
00164 raw_original += (double)weight[i] * (double)energy_original[i];
00165 }
00166
00167 double *raw_scores = (double *)malloc(sizeof(double) * param->z_cycles);
00168 for (long i = 0; i < param->z_cycles; i++) {
00169 raw_scores[i] = 0;
00170 for (long j = 0; j < e_count; j++)
00171 raw_scores[i] += (double)weight[j] * (double)energy_array[i][j];
00172 }
00173
00174 total = 0;
00175 for (int i = 0; i < param->z_cycles; i++) {
00176 total += raw_scores[i];
00177 }
00178 mean = total / (pValType)param->z_cycles;
00179 total = 0;
00180 for (int i = 0; i < param->z_cycles; i++) {
00181 total += pow( raw_scores[i] - mean, 2);
00182 }
00183 variance = total / (pValType)param->z_cycles;
00184 sd = sqrt(variance);
00185 if ( sd == 0 )
00186 sd = 1;
00187 zscore = (mean - raw_thread)/sd;
00188
00189
00190
00191 total = 0;
00192 for (long i = 0; i < param->z_cycles; i++) {
00193 total += (double)raw_scores[i];
00194 }
00195 mean = total / (double)param->z_cycles;
00196 total = 0;
00197 for (long i = 0; i < param->z_cycles; i++) {
00198 total += pow( raw_scores[i] - mean, 2);
00199 }
00200 variance = total / (double)param->z_cycles;
00201 sd = sqrt(variance);
00202
00203 zscore = (mean - raw_original)/sd;
00204
00205 if (param->v_level > 0) {
00206 printf("raw: %lf mean: %lf sd: %lf zscore: %lf\n",
00207 raw_thread, mean, sd, zscore );
00208 }
00209
00210 if ( isnan(zscore) ) {
00211 zscore = 0;
00212 }
00213 free(raw_scores);
00214 if ( full ) {
00215 scores->values[ ScoreStruct::SCORE_ZFULL ] = zscore;
00216 scores->z_sd[ ScoreStruct::SCORE_ZFULL ] = sd; scores->z_mean[ ScoreStruct::SCORE_ZFULL ] = mean;
00217 } else {
00218 scores->values[ ScoreStruct::SCORE_Z ] = zscore;
00219 scores->z_sd[ ScoreStruct::SCORE_Z ] = sd; scores->z_mean[ ScoreStruct::SCORE_Z ] = mean;
00220 }
00221
00222
00223 for (int j = 0; j < e_count; j++) {
00224 energy_total[j] = 0;
00225 }
00226
00227 for (long i = 0; i < param->z_cycles; i++) {
00228 for (int j = 0; j < e_count; j++) {
00229 energy_total[j] += energy_array[i][j];
00230 }
00231 }
00232 for (int i = 0; i < e_count; i++) {
00233 energy_mean[i] = energy_total[i] / (pValType) param->z_cycles;
00234 }
00235 for (int j = 0; j < e_count; j++) {
00236 energy_total[j] = 0;
00237 }
00238 for (long i = 0; i < param->z_cycles; i++) {
00239 for (int j = 0; j < e_count; j++) {
00240 energy_total[j] += pow(energy_array[i][j] - energy_mean[j], 2);
00241
00242 }
00243 }
00244 for (int i = 0; i < e_count; i++) {
00245 energy_sd[i] = sqrt( energy_total[i] / (pValType) param->z_cycles );
00246 }
00247
00248 for (int i = 0; i < e_count; i++) {
00249 energy_z[i] = (energy_mean[i] - energy_original[i]) / energy_sd[i];
00250 }
00251
00252 scores->values[ ScoreStruct::SCORE_ZMUTATIONLOG ] = energy_z[ e_mutationlog ];
00253 scores->z_sd[ ScoreStruct::SCORE_ZMUTATIONLOG ] = energy_sd[ e_mutationlog ]; scores->z_mean[ ScoreStruct::SCORE_ZMUTATIONLOG ] = energy_mean[ e_mutationlog ];
00254 scores->values[ ScoreStruct::SCORE_ZSINGLETON ] = energy_z[ e_singleton ];
00255 scores->z_sd[ ScoreStruct::SCORE_ZSINGLETON ] = energy_sd[ e_singleton ]; scores->z_mean[ ScoreStruct::SCORE_ZSINGLETON ] = energy_mean[ e_singleton ];
00256 scores->values[ ScoreStruct::SCORE_ZSEC_STRUCT ] = energy_z[ e_sec_struct ];
00257 scores->z_sd[ ScoreStruct::SCORE_ZSEC_STRUCT ] = energy_sd[ e_sec_struct ]; scores->z_mean[ ScoreStruct::SCORE_ZSEC_STRUCT ] = energy_mean[ e_sec_struct ];
00258 scores->values[ ScoreStruct::SCORE_ZTWOBODY ] = energy_z[ e_twobody ];
00259 scores->z_sd[ ScoreStruct::SCORE_ZTWOBODY ] = energy_sd[ e_twobody ]; scores->z_mean[ ScoreStruct::SCORE_ZTWOBODY ] = energy_mean[ e_twobody ];
00260 scores->values[ ScoreStruct::SCORE_ZDFIRE ] = energy_z[ e_dfire ];
00261 scores->z_sd[ ScoreStruct::SCORE_ZDFIRE ] = energy_sd[ e_dfire ]; scores->z_mean[ ScoreStruct::SCORE_ZDFIRE ] = energy_mean[ e_dfire ];
00262
00263 for (int i = ScoreStruct::SCORE_ZENERGY_START; i < ScoreStruct::SCORE_ZENERGY_END; i++) {
00264 if ( isnan( scores->values[i] ) ) {
00265 scores->values[i] = 0.0;
00266 }
00267 }
00268 return zscore;
00269 }
00270
00271
00272
00273 pValType ScoreStruct::ScoreZ(ProspectParam *param,
00274 WeightArray weight,
00275 TemplateStruct *template_data,
00276 TargetStruct *target_data,
00277 AlignmentStruct *original_align) {
00278 long score_mask = (e_mask( e_mutationlog ) | e_mask( e_singleton ) | e_mask( e_sec_struct ) | e_mask( e_twobody ) | e_mask( e_dfire ) );
00279 EnergyArray *energy_array = (EnergyArray *)malloc(sizeof(EnergyArray) * param->z_cycles);
00280 z_loop_calc_data *zloop_data = (z_loop_calc_data *)malloc(sizeof(z_loop_calc_data) * param->ncpus );
00281
00282 for (int i = 0; i < param->ncpus; i++) {
00283 for (int j= 0; j < e_count; j++)
00284 zloop_data[i].weight[j] = weight[j];
00285 zloop_data[i].param = param;
00286 zloop_data[i].template_data = template_data;
00287 zloop_data[i].target_data = target_data;
00288 zloop_data[i].energy_array = energy_array;
00289 zloop_data[i].step_size = param->ncpus;
00290 zloop_data[i].original_align = original_align;
00291 zloop_data[i].step_start = i;
00292 zloop_data[i].full = false;
00293 zloop_data[i].score_mask = score_mask;
00294 }
00295 if ( param->ncpus > 1 ) {
00296 pthread_t *threads;
00297 threads = (pthread_t *)malloc(sizeof(pthread_t) * param->ncpus);
00298 for (int i = 0; i < param->ncpus; i++) {
00299 pthread_create(&threads[i], NULL, z_loop_calc_wrapper, &zloop_data[i]);
00300 }
00301 for (int i = 0; i < param->ncpus; i++) {
00302 pthread_join(threads[i], NULL);
00303 }
00304 free(threads);
00305 free(zloop_data);
00306 } else {
00307 z_loop_calc_wrapper( zloop_data );
00308 }
00309 pValType zscore = energy_array_stat( param, weight,
00310 template_data, target_data,
00311 original_align,
00312 energy_array, this,
00313 false, score_mask );
00314 free( energy_array );
00315 this->values[ SCORE_Z ] = zscore;
00316 this->mask |= (0x01 << SCORE_Z) ;
00317
00318 return zscore;
00319 }
00320
00321
00322 pValType ScoreStruct::ScoreZFull(ProspectParam *param,
00323 WeightArray weight,
00324 TemplateStruct *template_data,
00325 TargetStruct *target_data,
00326 AlignmentStruct *original_align) {
00327 EnergyArray *energy_array = (EnergyArray *)malloc(sizeof(EnergyArray) * param->z_cycles);
00328 z_loop_calc_data *zloop_data = (z_loop_calc_data *)malloc(sizeof(z_loop_calc_data) * param->ncpus );
00329 for (int i = 0; i < param->ncpus; i++) {
00330 for (int j= 0; j < e_count; j++)
00331 zloop_data[i].weight[j] = weight[j];
00332 zloop_data[i].param = param;
00333 zloop_data[i].template_data = template_data;
00334 zloop_data[i].target_data = target_data;
00335 zloop_data[i].energy_array = energy_array;
00336 zloop_data[i].step_size = param->ncpus;
00337 zloop_data[i].original_align = original_align;
00338 zloop_data[i].step_start = i;
00339 zloop_data[i].full = true;
00340 zloop_data[i].score_mask = 0xFFFFFFFF;
00341 }
00342
00343 if ( param->ncpus > 1 ) {
00344 pthread_t *threads;
00345 threads = (pthread_t *)malloc(sizeof(pthread_t) * param->ncpus);
00346 for (int i = 0; i < param->ncpus; i++) {
00347 pthread_create(&threads[i], NULL, z_loop_calc_wrapper, &zloop_data[i]);
00348 }
00349 for (int i = 0; i < param->ncpus; i++) {
00350 pthread_join(threads[i], NULL);
00351 }
00352 free(threads);
00353 free(zloop_data);
00354 } else {
00355 z_loop_calc_wrapper( zloop_data );
00356 }
00357 pValType zscore = energy_array_stat( param, weight,
00358 template_data, target_data,
00359 original_align,
00360 energy_array, this,
00361 true, 0xFFFFFFFF );
00362 free( energy_array );
00363 this->values[ SCORE_ZFULL ] = zscore;
00364 this->mask |= (0x01 << SCORE_ZFULL) ;
00365
00366 return zscore;
00367 }
00368
00369
00370