- Notifications
You must be signed in to change notification settings - Fork 228
/
Copy pathtest_triplets_classifiers.py
127 lines (112 loc) · 4.96 KB
/
test_triplets_classifiers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
importpytest
fromsklearn.exceptionsimportNotFittedError
fromsklearn.model_selectionimporttrain_test_split
frommetric_learnimportSCML
fromtest.test_utilsimport (
triplets_learners,
ids_triplets_learners,
build_triplets
)
frommetric_learn.sklearn_shimsimportset_random_state
fromsklearnimportclone
importnumpyasnp
fromnumpy.testingimportassert_array_equal
@pytest.mark.parametrize('with_preprocessor', [True, False])
@pytest.mark.parametrize('estimator, build_dataset', triplets_learners,
ids=ids_triplets_learners)
deftest_predict_only_one_or_minus_one(estimator, build_dataset,
with_preprocessor):
"""Test that all predicted values are either +1 or -1"""
input_data, _, preprocessor, _=build_dataset(with_preprocessor)
estimator=clone(estimator)
estimator.set_params(preprocessor=preprocessor)
set_random_state(estimator)
triplets_train, triplets_test=train_test_split(input_data)
estimator.fit(triplets_train)
predictions=estimator.predict(triplets_test)
not_valid= [eforeinpredictionsifenotin [-1, 1]]
assertlen(not_valid) ==0
@pytest.mark.parametrize('estimator, build_dataset', triplets_learners,
ids=ids_triplets_learners)
deftest_no_zero_prediction(estimator, build_dataset):
"""
Test that all predicted values are not zero, even when the
distance d(x,y) and d(x,z) is the same for a triplet of the
form (x, y, z). i.e border cases.
"""
triplets, _, _, X=build_dataset(with_preprocessor=False)
# Force 3 dimentions only, to use cross product and get easy orthogonal vec.
triplets=np.array([[t[0][:3], t[1][:3], t[2][:3]] fortintriplets])
X=X[:, :3]
# Dummy fit
estimator=clone(estimator)
set_random_state(estimator)
estimator.fit(triplets)
# We force the transformation to be identity, to force euclidean distance
estimator.components_=np.eye(X.shape[1])
# Get two orthogonal vectors in respect to X[1]
k=X[1] /np.linalg.norm(X[1]) # Normalize first vector
x=X[2] -X[2].dot(k) *k# Get random orthogonal vector
x/=np.linalg.norm(x) # Normalize
y=np.cross(k, x) # Get orthogonal vector to x
# Assert these orthogonal vectors are different
withpytest.raises(AssertionError):
assert_array_equal(X[1], x)
withpytest.raises(AssertionError):
assert_array_equal(X[1], y)
# Assert the distance is the same for both
assertestimator.get_metric()(X[1], x) ==estimator.get_metric()(X[1], y)
# Form the three scenarios where predict() gives 0 with numpy.sign
triplets_test=np.array( # Critical examples
[[X[0], X[2], X[2]],
[X[1], X[1], X[1]],
[X[1], x, y]])
# Predict
predictions=estimator.predict(triplets_test)
# Check there are no zero values
assertnp.sum(predictions==0) ==0
@pytest.mark.parametrize('with_preprocessor', [True, False])
@pytest.mark.parametrize('estimator, build_dataset', triplets_learners,
ids=ids_triplets_learners)
deftest_raise_not_fitted_error_if_not_fitted(estimator, build_dataset,
with_preprocessor):
"""Test that a NotFittedError is raised if someone tries to predict and
the metric learner has not been fitted."""
input_data, _, preprocessor, _=build_dataset(with_preprocessor)
estimator=clone(estimator)
estimator.set_params(preprocessor=preprocessor)
set_random_state(estimator)
withpytest.raises(NotFittedError):
estimator.predict(input_data)
@pytest.mark.parametrize('estimator, build_dataset', triplets_learners,
ids=ids_triplets_learners)
deftest_accuracy_toy_example(estimator, build_dataset):
"""Test that the default scoring for triplets (accuracy) works on some
toy example"""
triplets, _, _, X=build_dataset(with_preprocessor=False)
estimator=clone(estimator)
set_random_state(estimator)
estimator.fit(triplets)
# We take the two first points and we build 4 regularly spaced points on the
# line they define, so that it's easy to build triplets of different
# similarities.
X_test=X[0] +np.arange(4)[:, np.newaxis] * (X[0] -X[1]) /4
triplets_test=np.array(
[[X_test[0], X_test[2], X_test[1]],
[X_test[1], X_test[3], X_test[0]],
[X_test[1], X_test[2], X_test[3]],
[X_test[3], X_test[0], X_test[2]]])
# we force the transformation to be identity so that we control what it does
estimator.components_=np.eye(X.shape[1])
assertestimator.score(triplets_test) ==0.25
deftest_raise_big_number_of_features():
triplets, _, _, X=build_triplets(with_preprocessor=False)
triplets=triplets[:3, :, :]
estimator=SCML(n_basis=320)
set_random_state(estimator)
withpytest.raises(ValueError) asexc_info:
estimator.fit(triplets)
assertexc_info.value.args[0] == \
"Number of features (4) is greater than the number of triplets(3)." \
"\nConsider using dimensionality reduction or using another basis " \
"generation scheme."