Changeset 1044 for cpp/frams/_demos/simil_test.cpp
- Timestamp:
- 12/10/20 22:33:22 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
cpp/frams/_demos/simil_test.cpp
r1005 r1044 5 5 6 6 #include <vector> 7 #include <string> 7 8 #include "common/loggers/loggertostdout.h" 8 9 #include "frams/_demos/genotypeloader.h" 9 10 #include "frams/genetics/preconfigured.h" 10 11 #include "common/virtfile/stdiofile.h" 11 #include "frams/model/similarity/simil_model.h" 12 13 12 #include "frams/model/similarity/measure-distribution.h" 13 #include "frams/model/similarity/measure-greedy.h" 14 #include "frams/model/similarity/measure-hungarian.h" 15 16 using namespace std; 17 18 int add_double_param(std::vector<string> *args, int pos, std::vector<double> *params, std::vector<string> *params_names) 19 { 20 for (unsigned int i = 0; i < params_names->size(); i++) 21 { 22 try 23 { 24 params->push_back(std::stod(args->at(pos))); 25 pos++; 26 } 27 catch (const std::invalid_argument&) 28 { 29 printf("%s should be a number\n", params_names->at(i).c_str()); 30 return -1; 31 } 32 catch (const std::out_of_range&) 33 { 34 printf("%s should be inside double range\n", params_names->at(i).c_str()); 35 return -1; 36 } 37 } 38 return 0; 39 } 14 40 15 41 /** Computes a matrix of distances between all genotypes in the specified … … 18 44 int main(int argc, char *argv[]) 19 45 { 20 LoggerToStdout messages_to_stdout(LoggerBase::Enable); 21 int iCurrParam = 0; // index of the currently processed parameter 22 char *szCurrParam = NULL; 23 ModelSimil M; // similarity computing object 24 bool bPrintNames = false; // specifies if names of genotypes are to be printed 25 int nResult = 0; // a temporary result 26 27 if (argc < 8) 28 { 29 printf("Too few parameters!\n"); 30 printf("Command line: [-names] <genotypesFile> <measure> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO> <fixZaxis?>\n\n"); 31 32 printf("Parameters:\n"); 33 printf(" <genotypesFile> name of a file with genotypes\n"); 34 printf(" <measure> similarity measure\n"); 35 printf(" <w_dP> weight of the difference in the number of parts\n"); 36 printf(" <w_dDEG> weight of the difference in degrees of matched parts\n"); 37 printf(" <w_dNEU> weight of the difference in neurons of matched parts\n"); 38 printf(" <w_dGEO> weight of the distance of matched parts\n"); 39 printf(" <fixZaxis?> should the 'z' (vertical) coordinate be fixed during the alignment? (0 or 1)\n\n"); 40 41 printf("Switches:\n"); 42 printf(" -names specifies that the number and names of genotypes are to be printed to output\n"); 43 printf(" before the distance matrix; by default the number and names are not printed\n\n"); 44 45 printf("Outputs a symmetric distance matrix in the format:\n"); 46 printf(" <row_1> (columns in a row are separated by TABs)\n"); 47 printf(" ...\n"); 48 printf(" <row_n>\n"); 49 50 return -1; 51 } 52 53 // prepare output parameters from .gen file 54 vector<Geno *> pvGenos; 55 vector<char *> pvNames; 56 57 // check if there is a switch 58 iCurrParam = 1; 59 szCurrParam = argv[iCurrParam]; 60 if (strcmp(szCurrParam, "-names") == 0) 61 { 62 // switch "-names" was given; print names also 63 bPrintNames = true; 64 // pass to the next parameter 65 iCurrParam++; 66 } 67 68 // check the parameters 69 // get <genotypesFile> name from command line 70 char *szFileName = argv[iCurrParam]; 71 72 // initially set measure components' weights to invalid values (negative) 73 for (int i = 0; i < M.GetNOFactors(); i++) 74 { 75 M.m_adFactors[i] = -1.0; 76 } 77 78 iCurrParam++; 79 szCurrParam = argv[iCurrParam]; 80 int measure_type = -1; 81 nResult = sscanf(szCurrParam, "%d", &measure_type); 82 if (nResult != 1) 83 { 84 printf("Measure type should be a number!\n"); 85 return -1; 86 } 87 88 if (measure_type != 0 && measure_type != 1) 89 { 90 printf("Measure type should be 0 (flexible criteria order and optimal matching) or 1 (vertex degree order and greedy matching)!\n"); 91 return -1; 92 } 93 94 M.matching_method = measure_type; 95 96 const char *params[] = { "<w_dP>", "<w_dDEG>", "<w_dNEU>", "<w_dGEO>" }; 97 for (int i = 0; i < M.GetNOFactors(); i++) 98 { 99 iCurrParam++; 100 szCurrParam = argv[iCurrParam]; 101 nResult = sscanf(szCurrParam, "%lf", &M.m_adFactors[i]); 102 if (nResult != 1) 103 { 104 // <w_dX> is not a number -- error 105 printf("%s", params[i]); 106 printf(" should be a number\n"); 107 return -1; 108 } 109 else 110 { 111 // <w_dX> is a number; check if nonnegative 112 if (M.m_adFactors[i] < 0.0) 113 { 114 printf("%s", params[i]); 115 printf(" should be a nonnegative number\n"); 116 return -1; 117 } 118 } 119 } 120 121 iCurrParam++; 122 szCurrParam = argv[iCurrParam]; 123 nResult = sscanf(szCurrParam, "%d", &M.fixedZaxis); 124 if (nResult != 1) 125 { 126 // <isZFixed> is not a number -- error 127 printf("<isZFixed> should be a number\n"); 128 return -1; 129 } 130 else if (M.fixedZaxis != 0 && M.fixedZaxis != 1) 131 { 132 printf("<isZFixed>=%d. <isZFixed> should be equal to 0 or 1\n", M.fixedZaxis); 133 return -1; 134 } 135 136 // read the input file 137 // prepare loading of genotypes from a .gen file 138 // create some basic genotype converters 139 PreconfiguredGenetics genetics; 140 StdioFileSystem_autoselect stdiofilesys; 141 142 long count = 0, totalsize = 0; 143 GenotypeMiniLoader loader(szFileName); 144 GenotypeMini *loaded; 145 while (loaded = loader.loadNextGenotype()) 146 { 147 // while a valid genotype was loaded 148 count++; 149 totalsize += loaded->genotype.length(); 150 // create a Geno object based on the MiniGenotype 151 Geno *pNextGenotype = new Geno(loaded->genotype); 152 if ((pNextGenotype != NULL) && (pNextGenotype->isValid())) 153 { 154 pvGenos.push_back(pNextGenotype); 155 char *szNewName = new char[loaded->name.length() + 1]; 156 strcpy(szNewName, loaded->name.c_str()); 157 pvNames.push_back(szNewName); 158 } 159 else 160 { 161 printf("Genotype %2li is not valid\n", count); 162 if (pNextGenotype != NULL) delete pNextGenotype; 163 } 164 } 165 if (loader.getStatus() == GenotypeMiniLoader::OnError) 166 { 167 printf("Error: %s", loader.getError().c_str()); 168 } 169 170 double dSimilarity = 0.0; 171 double **aaSimil = NULL; // array of similarities 172 173 // create an empty array of similarities 174 aaSimil = new double*[pvGenos.size()]; 175 for (unsigned int k = 0; k < pvGenos.size(); k++) 176 { 177 aaSimil[k] = new double[pvGenos.size()]; 178 for (unsigned int l = 0; l < pvGenos.size(); l++) 179 aaSimil[k][l] = 0.0; 180 } 181 182 // compute and store similarities 183 for (unsigned int i = 0; i < pvGenos.size(); i++) 184 { 185 for (unsigned int j = 0; j < pvGenos.size(); j++) 186 { 187 dSimilarity = M.EvaluateDistance(pvGenos.operator[](i), pvGenos.operator[](j)); 188 aaSimil[i][j] = dSimilarity; 189 } 190 } 191 192 if (bPrintNames) 193 { 194 // if the "-names" switch was given, print the number of genotypes and their names 195 printf("%li\n", pvGenos.size()); 196 for (unsigned int iGen = 0; iGen < pvNames.size(); iGen++) 197 { 198 printf("%s\n", pvNames.at(iGen)); 199 } 200 } 201 202 // print out the matrix of similarities 203 for (unsigned int i = 0; i < pvGenos.size(); i++) 204 { 205 for (unsigned int j = 0; j < pvGenos.size(); j++) 206 { 207 printf("%.2lf\t", aaSimil[i][j]); 208 } 209 printf("\n"); 210 } 211 212 // delete vectors and arrays 213 for (unsigned int i = 0; i < pvGenos.size(); i++) 214 { 215 delete pvGenos.operator[](i); 216 delete[] pvNames.operator[](i); 217 delete[] aaSimil[i]; 218 } 219 220 delete[] aaSimil; 221 222 return 0; 46 typedef double *pDouble; 47 LoggerToStdout messages_to_stdout(LoggerBase::Enable); 48 SimilMeasure *simil_measure = nullptr; 49 if (argc < 5) 50 { 51 printf("Too few parameters!\n"); 52 printf("Command line: [-names] <genotypesFile> <measure (greedy/hungarian)> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO> <fixZaxis?>\n\n"); 53 printf("Command line: [-names] <genotypesFile> <measure (distribution)> <desc> <simil> <dens> <bins> <samp_num>\n\n"); 54 printf("Parameters:\n"); 55 printf(" <genotypesFile> name of a file with genotypes\n"); 56 printf(" <measure> similarity measure name (greedy/hungarian/distribution)\n"); 57 printf("\n"); 58 printf("Parameters of greedy and hungarian measures:\n"); 59 printf(" <w_dP> weight of the difference in the number of parts\n"); 60 printf(" <w_dDEG> weight of the difference in degrees of matched parts\n"); 61 printf(" <w_dNEU> weight of the difference in neurons of matched parts\n"); 62 printf(" <w_dGEO> weight of the distance of matched parts\n"); 63 printf(" <fixZaxis?> should the 'z' (vertical) coordinate be fixed during the alignment? (0 or 1)\n\n"); 64 printf("Parameters of distribution measure:\n"); 65 printf(" <dens> sampling density\n"); 66 printf(" <bins> number of histogram bins\n"); 67 printf(" <samp_num> number of samples taken\n\n"); 68 69 printf("Switches:\n"); 70 printf(" -names specifies that the number and names of genotypes are to be printed to output\n"); 71 printf(" before the distance matrix; by default the number and names are not printed\n\n"); 72 73 printf("Outputs a symmetric distance matrix in the format:\n"); 74 printf(" <row_1> (columns in a row are separated by TABs)\n"); 75 printf(" ...\n"); 76 printf(" <row_n>\n"); 77 78 return -1; 79 } 80 81 std::vector<string> args; 82 for (int i = 1; i < argc; i++) 83 args.push_back(std::string(argv[i])); 84 85 bool print_names = false; 86 87 int pos = 1; 88 if (args.at(0).compare("-names")==0) 89 { 90 print_names = true; 91 pos = 2; 92 } 93 94 string measure_name = args.at(pos); 95 pos++; 96 std::vector<double> params; 97 98 if (measure_name.compare("greedy")==0 || measure_name.compare("hungarian")==0) 99 { 100 std::vector<string> params_names{ "<w_dP>", "<w_dDEG>", "<w_dNEU>", "<w_dGEO>", "<fixZaxis?>" }; 101 102 if (add_double_param(&args, pos, ¶ms, ¶ms_names) == -1) 103 return -1; 104 105 if (measure_name.compare("greedy")==0) 106 simil_measure = new SimilMeasureGreedy(); 107 else 108 simil_measure = new SimilMeasureHungarian(); 109 } 110 111 112 else if (measure_name.compare("distribution")==0) 113 { 114 std::vector<string> params_names{ "<dens>", "<bins>", "<samp_num>" }; 115 116 if (add_double_param(&args, pos, ¶ms, ¶ms_names)==-1) 117 return -1; 118 119 simil_measure = new SimilMeasureDistribution(); 120 } 121 122 else 123 { 124 printf("Measure type should be greedy (flexible criteria order and optimal matching), hungarian (vertex degree order and greedy matching) or distribution!\n"); 125 return -1; 126 } 127 128 simil_measure->setParams(params); 129 130 // read the input file 131 // prepare loading of genotypes from a .gen file 132 // create some basic genotype converters 133 PreconfiguredGenetics genetics; 134 StdioFileSystem_autoselect stdiofilesys; 135 136 // prepare output parameters from .gen file 137 vector<Geno *> pvGenos; 138 vector<char *> pvNames; 139 140 long count = 0, totalsize = 0; 141 GenotypeMiniLoader loader(args.at(0).c_str()); 142 GenotypeMini *loaded; 143 while (loaded = loader.loadNextGenotype()) 144 { 145 // while a valid genotype was loaded 146 count++; 147 totalsize += loaded->genotype.length(); 148 // create a Geno object based on the MiniGenotype 149 Geno *pNextGenotype = new Geno(loaded->genotype); 150 if ((pNextGenotype != NULL) && (pNextGenotype->isValid())) 151 { 152 pvGenos.push_back(pNextGenotype); 153 char *szNewName = new char[loaded->name.length() + 1]; 154 strcpy(szNewName, loaded->name.c_str()); 155 pvNames.push_back(szNewName); 156 } 157 else 158 { 159 printf("Genotype %2li is not valid\n", count); 160 if (pNextGenotype != NULL) delete pNextGenotype; 161 } 162 } 163 if (loader.getStatus() == GenotypeMiniLoader::OnError) 164 { 165 printf("Error: %s", loader.getError().c_str()); 166 } 167 168 double dSimilarity = 0.0; 169 double **aaSimil = NULL; // array of similarities 170 171 // create the empty array of similarities 172 aaSimil = new pDouble[pvGenos.size()]; 173 for (unsigned int k = 0; k < pvGenos.size(); k++) 174 { 175 aaSimil[k] = new double[pvGenos.size()]; 176 for (unsigned int l = 0; l < pvGenos.size(); l++) 177 aaSimil[k][l] = 0.0; 178 } 179 180 181 182 // compute and remember similarities 183 for (unsigned int i = 0; i < pvGenos.size(); i++) 184 { 185 for (unsigned int j = 0; j < pvGenos.size(); j++) 186 { 187 dSimilarity = simil_measure->evaluateDistance(pvGenos.operator[](i), pvGenos.operator[](j)); 188 aaSimil[i][j] = dSimilarity; 189 } 190 } 191 192 if (print_names) 193 { 194 // if "-names" switch was given, print the number of genotypes and their names 195 printf("%li\n", pvGenos.size()); 196 for (unsigned int iGen = 0; iGen < pvNames.size(); iGen++) 197 { 198 printf("%s\n", pvNames.at(iGen)); 199 } 200 } 201 202 // print out the matrix of similarities 203 for (unsigned int i = 0; i < pvGenos.size(); i++) 204 { 205 for (unsigned int j = 0; j < pvGenos.size(); j++) 206 { 207 printf("%.2lf\t", aaSimil[i][j]); 208 } 209 printf("\n"); 210 } 211 212 // delete vectors and arrays 213 for (unsigned int i = 0; i < pvGenos.size(); i++) 214 { 215 delete pvGenos.operator[](i); 216 delete[] pvNames.operator[](i); 217 delete[] aaSimil[i]; 218 } 219 220 delete[] aaSimil; 221 delete simil_measure; 222 223 return 0; 223 224 }
Note: See TracChangeset
for help on using the changeset viewer.