- Notifications
You must be signed in to change notification settings - Fork 1.8k
/
Copy pathtsne.py
132 lines (101 loc) · 3.89 KB
/
tsne.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# coding:utf-8
importlogging
importnumpyasnp
frommla.baseimportBaseEstimator
frommla.metrics.distanceimportl2_distance
np.random.seed(999)
"""
References:
https://lvdmaaten.github.io/tsne/
Based on:
https://lvdmaaten.github.io/tsne/code/tsne_python.zip
"""
classTSNE(BaseEstimator):
y_required=False
def__init__(self, n_components=2, perplexity=30.0, max_iter=200, learning_rate=500):
"""A t-Distributed Stochastic Neighbor Embedding implementation.
Parameters
----------
max_iter : int, default 200
perplexity : float, default 30.0
n_components : int, default 2
"""
self.max_iter=max_iter
self.perplexity=perplexity
self.n_components=n_components
self.initial_momentum=0.5
self.final_momentum=0.8
self.min_gain=0.01
self.lr=learning_rate
self.tol=1e-5
self.perplexity_tries=50
deffit_transform(self, X, y=None):
self._setup_input(X, y)
Y=np.random.randn(self.n_samples, self.n_components)
velocity=np.zeros_like(Y)
gains=np.ones_like(Y)
P=self._get_pairwise_affinities(X)
iter_num=0
whileiter_num<self.max_iter:
iter_num+=1
D=l2_distance(Y)
Q=self._q_distribution(D)
# Normalizer q distribution
Q_n=Q/np.sum(Q)
# Early exaggeration & momentum
pmul=4.0ifiter_num<100else1.0
momentum=0.5ifiter_num<20else0.8
# Perform gradient step
grads=np.zeros(Y.shape)
foriinrange(self.n_samples):
grad=4*np.dot((pmul*P[i] -Q_n[i]) *Q[i], Y[i] -Y)
grads[i] =grad
gains= (gains+0.2) * ((grads>0) != (velocity>0)) + (gains*0.8) * ((grads>0) == (velocity>0))
gains=gains.clip(min=self.min_gain)
velocity=momentum*velocity-self.lr* (gains*grads)
Y+=velocity
Y=Y-np.mean(Y, 0)
error=np.sum(P*np.log(P/Q_n))
logging.info("Iteration %s, error %s"% (iter_num, error))
returnY
def_get_pairwise_affinities(self, X):
"""Computes pairwise affinities."""
affines=np.zeros((self.n_samples, self.n_samples), dtype=np.float32)
target_entropy=np.log(self.perplexity)
distances=l2_distance(X)
foriinrange(self.n_samples):
affines[i, :] =self._binary_search(distances[i], target_entropy)
# Fill diagonal with near zero value
np.fill_diagonal(affines, 1.0e-12)
affines=affines.clip(min=1e-100)
affines= (affines+affines.T) / (2*self.n_samples)
returnaffines
def_binary_search(self, dist, target_entropy):
"""Performs binary search to find suitable precision."""
precision_min=0
precision_max=1.0e15
precision=1.0e5
for_inrange(self.perplexity_tries):
denom=np.sum(np.exp(-dist[dist>0.0] /precision))
beta=np.exp(-dist/precision) /denom
# Exclude zeros
g_beta=beta[beta>0.0]
entropy=-np.sum(g_beta*np.log2(g_beta))
error=entropy-target_entropy
iferror>0:
# Decrease precision
precision_max=precision
precision= (precision+precision_min) /2.0
else:
# Increase precision
precision_min=precision
precision= (precision+precision_max) /2.0
ifnp.abs(error) <self.tol:
break
returnbeta
def_q_distribution(self, D):
"""Computes Student t-distribution."""
Q=1.0/ (1.0+D)
np.fill_diagonal(Q, 0.0)
Q=Q.clip(min=1e-100)
returnQ