- Notifications
You must be signed in to change notification settings - Fork 1.8k
/
Copy pathpca.py
61 lines (47 loc) · 1.72 KB
/
pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# coding:utf-8
importlogging
importnumpyasnp
fromscipy.linalgimportsvd
frommla.baseimportBaseEstimator
np.random.seed(1000)
classPCA(BaseEstimator):
y_required=False
def__init__(self, n_components, solver="svd"):
"""Principal component analysis (PCA) implementation.
Transforms a dataset of possibly correlated values into n linearly
uncorrelated components. The components are ordered such that the first
has the largest possible variance and each following component as the
largest possible variance given the previous components. This causes
the early components to contain most of the variability in the dataset.
Parameters
----------
n_components : int
solver : str, default 'svd'
{'svd', 'eigen'}
"""
self.solver=solver
self.n_components=n_components
self.components=None
self.mean=None
deffit(self, X, y=None):
self.mean=np.mean(X, axis=0)
self._decompose(X)
def_decompose(self, X):
# Mean centering
X=X.copy()
X-=self.mean
ifself.solver=="svd":
_, s, Vh=svd(X, full_matrices=True)
elifself.solver=="eigen":
s, Vh=np.linalg.eig(np.cov(X.T))
Vh=Vh.T
s_squared=s**2
variance_ratio=s_squared/s_squared.sum()
logging.info("Explained variance ratio: %s"% (variance_ratio[0: self.n_components]))
self.components=Vh[0: self.n_components]
deftransform(self, X):
X=X.copy()
X-=self.mean
returnnp.dot(X, self.components.T)
def_predict(self, X=None):
returnself.transform(X)