Changeset 598
 Timestamp:
 08/25/16 17:44:00 (8 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

mdsandtrees/mds_plot.py
r597 r598 16 16 17 17 18 18 #http://www.nervouscomputer.com/hfs/cmdscaleinpython/ 19 def cmdscale(D): 20 """ 21 Classical multidimensional scaling (MDS) 22 23 Parameters 24  25 D : (n, n) array 26 Symmetric distance matrix. 27 28 Returns 29  30 Y : (n, p) array 31 Configuration matrix. Each column represents a dimension. Only the 32 p dimensions corresponding to positive eigenvalues of B are returned. 33 Note that each dimension is only determined up to an overall sign, 34 corresponding to a reflection. 35 36 e : (n,) array 37 Eigenvalues of B. 38 39 """ 40 # Number of points 41 n = len(D) 42 43 # Centering matrix 44 H = np.eye(n)  np.ones((n, n))/n 45 46 # YY^T 47 B = H.dot(D**2).dot(H)/2 48 49 # Diagonalize 50 evals, evecs = np.linalg.eigh(B) 51 52 # Sort by eigenvalue in descending order 53 idx = np.argsort(evals)[::1] 54 evals = evals[idx] 55 evecs = evecs[:,idx] 56 57 # Compute the coordinates using positiveeigenvalued components only 58 w, = np.where(evals > 0) 59 L = np.diag(np.sqrt(evals[w])) 60 V = evecs[:,w] 61 Y = V.dot(L) 62 63 return Y, evals 19 64 20 65 def rand_jitter(arr): … … 31 76 32 77 def compute_mds(distance_matrix, dim): 33 seed = np.random.RandomState(seed=3) 34 mds = manifold.MDS(n_components=int(dim), metric=True, max_iter=3000, eps=1e9, random_state=seed, dissimilarity="precomputed") 35 embed = mds.fit(distance_matrix).embedding_ 78 embed, evals = cmdscale(distance_matrix) 79 variances = compute_variances(embed) 80 embed = np.asarray([embed[:,i] for i in range(dim)]).T 81 82 percent_variances = [sum(variances[:i+1])/sum(variances) for i in range(dim)] 83 for i,pv in enumerate(percent_variances): 84 print(i+1,"dimension:",pv) 85 36 86 return embed 37 87 … … 41 91 for i in range(len(embed[0])): 42 92 variances.append(np.var(embed[:,i])) 43 percent_variances = [sum(variances[:i+1])/sum(variances) for i in range(len(variances))] 44 return percent_variances 93 return variances 45 94 46 95 … … 69 118 else: 70 119 plt.savefig(outname+".pdf") 120 np.savetxt(outname+".csv", coordinates, delimiter=";") 71 121 72 122 73 123 def main(filename, dimensions=3, outname="", jitter=0, separator='\t'): 124 dimensions = int(dimensions) 74 125 distances = read_file(filename, separator) 75 126 embed = compute_mds(distances, dimensions) 76 127 77 variances_perc = compute_variances(embed)78 for i,vc in enumerate(variances_perc):79 print(i+1,"dimension:",vc)80 81 dimensions = int(dimensions)82 128 if dimensions == 1: 83 129 embed = np.array([np.insert(e, 0, 0, axis=0) for e in embed])
Note: See TracChangeset
for help on using the changeset viewer.