source: cpp/frams/_demos/simil_test.cpp @ 382

Last change on this file since 382 was 382, checked in by sz, 9 years ago

Moving frams/virtfile to common/virtfile:

  • file references updated (includes, makefile)
  • common/virtfile can no longer use the Framsticks specific SString (using std::string instead)
File size: 6.8 KB
Line 
1// This file is a part of Framsticks SDK.  http://www.framsticks.com/
2// Copyright (C) 1999-2015  Maciej Komosinski and Szymon Ulatowski.
3// See LICENSE.txt for details.
4
5
6#include <vector>
7#include "frams/loggers/loggertostdout.h"
8#include "frams/_demos/genotypeloader.h"
9#include "frams/genetics/preconfigured.h"
10#include "common/virtfile/stdiofile.h"
11#include "frams/model/similarity/simil_model.h"
12
13
14
15using namespace std;
16
17/** Computes a matrix of distances between all genotypes in the specified
18    .gen file, using the matching and measure weights as specified in the
19    command line.
20
21    Command line parameters: [-names] <genotypesFile> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO>
22
23    Parameters:
24      <genotypesFile> name of a file with genotypes
25      <w_dP> weight of the difference in the number of parts
26      <w_dDEG> weight of the difference in degrees of matched parts
27      <w_dNEU> weight of the difference in neurons of matched parts
28      <w_dGEO> weight of the distance between matched parts
29
30    Switches:
31      -names specifies that the number and names of genotypes are to be printed to output
32        before the distance matrix; by default the number and names are not printed
33
34    Outputs a distance matrix in the format:
35    <row_1> (columns in a row are separated by TABs)
36    ...
37    <row_n>
38 */
39int main(int argc, char *argv[])
40{
41    LoggerToStdout messages_to_stdout(LoggerBase::Enable);
42    typedef double *pDouble;
43    int iCurrParam = 0; // index of the currently processed parameter
44    char *szCurrParam = NULL;
45    ModelSimil M; // similarity computing object
46    bool bPrintNames = false; // specifies if names of genotypes are to be printed
47    int nResult = 0; // a temporary result
48
49    if (argc < 6)
50    {
51        // too few parameters
52        printf("Too few parameters!\n");
53        printf("Command line: [-names] <genotypesFile> <matchType> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO> <ifFUZZY>\n\n");
54        printf("Parameters:\n");
55        printf(" <genotypesFile> name of a file with genotypes (only f1 format is allowed)\n");
56        printf(" <w_dP> weight of the difference in the number of parts\n");
57        printf(" <w_dDEG> weight of the difference in degrees of matched parts\n");
58        printf(" <w_dNEU> weight of the difference in neurons of matched parts\n");
59        printf(" <w_dGEO> weight of the distance of matched parts\n\n");
60        printf("Switches:\n");
61        printf(" -names specifies that the number and names of genotypes are to be printed to output\n");
62        printf("   before the distance matrix; by default the number and names are not printed\n\n");
63
64        printf("Outputs a symmetric distance matrix in the format:\n");
65        printf(" <row_1> (columns in a row are separated by TABs)\n");
66        printf(" ...\n");
67        printf(" <row_n>\n");
68
69        return -1;
70    }
71
72    // prepare output parameters from .gen file
73    vector<Geno *> *pvGenos = new vector<Geno *>();
74    vector<char *> *pvNames = new vector<char *>();
75
76    // check if there is a switch
77    iCurrParam = 1;
78    szCurrParam = argv[ iCurrParam ];
79    if (strcmp(szCurrParam, "-names") == 0)
80    {
81        // switch "-names" was given; print names also
82        bPrintNames = true;
83        // pass to the next parameter
84        iCurrParam++;
85    }
86
87    // check the parameters
88    // get <genotypesFile> name from command line
89    char *szFileName = argv[ iCurrParam ];
90
91    // initially set measure components' weights to invalid values (negative)
92    for (int i = 0; i < M.GetNOFactors(); i++)
93    {
94        M.m_adFactors[i] = -1.0;
95    }
96
97    const char *params[] = {"<w_dP>", "<w_dDEG>", "<w_dNEU>", "<w_dGEO>"};
98    for (int i = 0; i < M.GetNOFactors(); i++)
99    {
100        iCurrParam++;
101        szCurrParam = argv[ iCurrParam ];
102        nResult = sscanf(szCurrParam, " %lf ", & M.m_adFactors[ i ]);
103        if (nResult != 1)
104        {
105            // <w_dP> is not a number -- error
106            printf("%s", params[i]);
107            printf(" should be a number\n");
108            return -1;
109        }
110        else
111        {
112            // <w_dP> is a number; check if nonnegative
113            if (M.m_adFactors[ i ] < 0.0)
114            {
115                printf("%s", params[i]);
116                printf(" should be a nonnegative number\n");
117                return -1;
118            }
119        }
120    }
121
122    // read the input file
123    // prepare loading of genotypes from a .gen file
124    // create some basic genotype converters
125    PreconfiguredGenetics genetics;
126    StdioFileSystem_autoselect stdiofilesys;
127
128    long count = 0, totalsize = 0;
129    MiniGenotypeLoader loader(szFileName);
130    MiniGenotype *loaded;
131    while (loaded = loader.loadNextGenotype())
132    {
133        // while a valid genotype was loaded
134        count++;
135        totalsize += loaded->genotype.len();
136        // create a Geno object based on the MiniGenotype
137        Geno *pNextGenotype = new Geno(loaded->genotype);
138        if ((pNextGenotype != NULL) && (pNextGenotype->isValid()))
139        {
140            pvGenos->push_back(pNextGenotype);
141            char *szNewName = new char [ loaded->name.len() + 1];
142            strcpy(szNewName, loaded->name.c_str());
143            pvNames->push_back(szNewName);
144        }
145        else
146        {
147            printf("Genotype %2li is not valid\n", count);
148        }
149    }
150    if (loader.getStatus() == MiniGenotypeLoader::OnError)
151    {
152        printf("Error: %s", loader.getError().c_str());
153    }
154
155    double dSimilarity = 0.0;
156    double **aaSimil = NULL; // array of similarities
157
158    // create the empty array of similarities
159    aaSimil = new pDouble [pvGenos->size()];
160    for (unsigned int k = 0; k < pvGenos->size(); k++)
161    {
162        aaSimil[k] = new double [pvGenos->size()];
163                for (unsigned int l = 0; l < pvGenos->size(); l++)
164            aaSimil[k][l] = 0.0;
165    }
166
167    // compute and remember similarities
168        for (unsigned int i = 0; i < pvGenos->size(); i++)
169    {
170                for (unsigned int j = 0; j < pvGenos->size(); j++)
171        {
172            dSimilarity = M.EvaluateDistance(pvGenos->operator[](i), pvGenos->operator[](j));
173            aaSimil[i][j] = dSimilarity;
174        }
175    }
176
177    if (bPrintNames)
178    {
179        // if "-names" switch was given,
180        // print the number of genotypes and their names
181        printf("%li\n", pvGenos->size());
182                for (unsigned int iGen = 0; iGen < pvNames->size(); iGen++)
183        {
184            printf("%s\n", pvNames->at(iGen));
185        }
186    }
187
188    // print out the matrix of similarities
189        for (unsigned int i = 0; i < pvGenos->size(); i++)
190    {
191                for (unsigned int j = 0; j < pvGenos->size(); j++)
192        {
193            printf("%.2lf\t", aaSimil[i][j]);
194        }
195        printf("\n");
196    }
197
198    // delete vectors and arrays
199        for (unsigned int i = 0; i < pvGenos->size(); i++)
200    {
201        delete pvGenos->operator[](i);
202        delete [] pvNames->operator[](i);
203        delete [] aaSimil[i];
204    }
205
206    delete pvGenos;
207    delete pvNames;
208    delete [] aaSimil;
209
210    return 0;
211}
Note: See TracBrowser for help on using the repository browser.