- Notifications
You must be signed in to change notification settings - Fork 54
/
Copy pathComputeAccuracy.html
326 lines (299 loc) · 30.8 KB
/
ComputeAccuracy.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
<!DOCTYPE html>
<htmlclass="writer-html5" lang="en" >
<head>
<metacharset="utf-8" /><metacontent="Topic: Measuring classification accuracy, Difficulty: Easy, Category: Practice Problem" name="description" />
<metacontent="numpy, vectorization, practice, machine learning, classifier" name="keywords" />
<metaname="viewport" content="width=device-width, initial-scale=1.0" />
<title>Measuring the Accuracy of a Classification Model — Python Like You Mean It</title>
<linkrel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<linkrel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<linkrel="stylesheet" href="../../_static/my_theme.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<scriptdata-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
<scriptsrc="../../_static/jquery.js"></script>
<scriptsrc="../../_static/underscore.js"></script>
<scriptsrc="../../_static/doctools.js"></script>
<scriptasync="async" src="https://www.googletagmanager.com/gtag/js?id=UA-115029372-1"></script>
<scriptsrc="../../_static/gtag.js"></script>
<scriptcrossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script>window.MathJax={"tex": {"inlineMath": [["$","$"],["\\(","\\)"]],"processEscapes": true},"options": {"ignoreHtmlClass": "tex2jax_ignore|mathjax_ignore|document","processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
<scriptdefer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
<scriptsrc="../../_static/js/theme.js"></script>
<linkrel="index" title="Index" href="../../genindex.html" />
<linkrel="search" title="Search" href="../../search.html" />
<linkrel="next" title="Playing Darts and Estimating Pi" href="Approximating_pi.html" />
<linkrel="prev" title="Module 3: Problems" href="../../module_3_problems.html" />
</head>
<bodyclass="wy-body-for-nav">
<divclass="wy-grid-for-nav">
<navdata-toggle="wy-nav-shift" class="wy-nav-side">
<divclass="wy-side-scroll">
<divclass="wy-side-nav-search" >
<ahref="../../index.html" class="icon icon-home"> Python Like You Mean It
</a>
<divclass="version">
1.4
</div>
<divrole="search">
<formid="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<inputtype="text" name="q" placeholder="Search docs" />
<inputtype="hidden" name="check_keywords" value="yes" />
<inputtype="hidden" name="area" value="default" />
</form>
</div>
</div><divclass="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<pclass="caption" role="heading"><spanclass="caption-text">Table of Contents:</span></p>
<ulclass="current">
<liclass="toctree-l1"><aclass="reference internal" href="../../intro.html">Python Like You Mean It</a></li>
<liclass="toctree-l1"><aclass="reference internal" href="../../module_1.html">Module 1: Getting Started with Python</a></li>
<liclass="toctree-l1"><aclass="reference internal" href="../../module_2.html">Module 2: The Essentials of Python</a></li>
<liclass="toctree-l1"><aclass="reference internal" href="../../module_2_problems.html">Module 2: Problems</a></li>
<liclass="toctree-l1"><aclass="reference internal" href="../../module_3.html">Module 3: The Essentials of NumPy</a></li>
<liclass="toctree-l1 current"><aclass="reference internal" href="../../module_3_problems.html">Module 3: Problems</a><ulclass="current">
<liclass="toctree-l2 current"><aclass="current reference internal" href="#">Measuring the Accuracy of a Classification Model</a><ul>
<liclass="toctree-l3"><aclass="reference internal" href="#Unvectorized-Solution">Unvectorized Solution</a></li>
<liclass="toctree-l3"><aclass="reference internal" href="#Vectorized-Solution">Vectorized Solution</a></li>
</ul>
</li>
<liclass="toctree-l2"><aclass="reference internal" href="Approximating_pi.html">Playing Darts and Estimating Pi</a></li>
</ul>
</li>
<liclass="toctree-l1"><aclass="reference internal" href="../../module_4.html">Module 4: Object Oriented Programming</a></li>
<liclass="toctree-l1"><aclass="reference internal" href="../../module_5.html">Module 5: Odds and Ends</a></li>
<liclass="toctree-l1"><aclass="reference internal" href="../../changes.html">Changelog</a></li>
</ul>
</div>
</div>
</nav>
<sectiondata-toggle="wy-nav-shift" class="wy-nav-content-wrap"><navclass="wy-nav-top" aria-label="Mobile navigation menu" >
<idata-toggle="wy-nav-top" class="fa fa-bars"></i>
<ahref="../../index.html">Python Like You Mean It</a>
</nav>
<divclass="wy-nav-content">
<divclass="rst-content">
<divrole="navigation" aria-label="Page navigation">
<ulclass="wy-breadcrumbs">
<li><ahref="../../index.html" class="icon icon-home"></a> »</li>
<li><ahref="../../module_3_problems.html">Module 3: Problems</a> »</li>
<li>Measuring the Accuracy of a Classification Model</li>
<liclass="wy-breadcrumbs-aside">
<ahref="../../_sources/Module3_IntroducingNumpy/Problems/ComputeAccuracy.md.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<divrole="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<divitemprop="articleBody">
<style>
/* CSS overrides for sphinx_rtd_theme */
/* 24px margin */
.nbinput.nblast.container,
.nboutput.nblast.container {
margin-bottom:19px; /* padding has already 5px */
}
/* ... except between code cells! */
.nblast.container+ .nbinput.container {
margin-top:-19px;
}
.admonition>p:before {
margin-right:4px; /* make room for the exclamation icon */
}
/* Fix math alignment, see https://github.com/rtfd/sphinx_rtd_theme/pull/686 */
.math {
text-align: unset;
}
</style>
<divclass="section" id="Measuring-the-Accuracy-of-a-Classification-Model">
<h1>Measuring the Accuracy of a Classification Model<aclass="headerlink" href="#Measuring-the-Accuracy-of-a-Classification-Model" title="Permalink to this headline"></a></h1>
<p>Suppose that we are working on a project in which we have some model that can process an image and classify its content. For example, my <codeclass="docutils literal notranslate"><spanclass="pre">cat_dog_goose_other</span></code> function tries to classify whether a picture is of a cat (class 0), a dog (class 1), a goose (class 2), or something else (class 3). We want to measure the <em>accuracy</em> of our classifier. That is, we want to feed it a series of images whose contents are known and tally the number of times the model’s prediction matches the true content of
an image. The accuracy is the fraction of images that the model classifies correctly.</p>
<p>For each image we feed the <codeclass="docutils literal notranslate"><spanclass="pre">cat_dog_goose_other</span></code> model, it will produce four <strong>scores</strong> - one score for each class. The model was designed such that the class with the highest score corresponds to its prediction. There are no constraints on the values the scores can take. For example, if the model processes one image it will return a shape-<spanclass="math notranslate nohighlight">\((1, 4)\)</span> score-array:</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="gp">>>> </span><spanclass="n">scores</span><spanclass="o">=</span><spanclass="n">cat_dog_goose_other</span><spanclass="p">(</span><spanclass="n">image</span><spanclass="p">)</span>
<spanclass="go"># processing one image produces a 1x4 array of classification scores</span>
<spanclass="gp">>>> </span><spanclass="n">scores</span>
<spanclass="go">array([[-10, 33, 580, 100]])</span>
</pre></div>
</div>
<p>Here, our model has predicted that this is a picture of a goose, since the score associate with class 2 (<codeclass="docutils literal notranslate"><spanclass="pre">scores[2]</span></code>) is the largest value. In general, if we pass <codeclass="docutils literal notranslate"><spanclass="pre">cat_dog_goose_other</span></code> an array of <spanclass="math notranslate nohighlight">\(N\)</span> images, it will return a shape-<spanclass="math notranslate nohighlight">\((N, 4)\)</span> array of classification scores - each of the <spanclass="math notranslate nohighlight">\(N\)</span> images has <spanclass="math notranslate nohighlight">\(4\)</span> scores associated with it.</p>
<p>Because we are measuring our model’s accuracy, we have curated a set of images whose contents are known. That is, we have a true <strong>label</strong> for each image, which is encoded as a class-ID. For example, a picture of a cat would have the label <codeclass="docutils literal notranslate"><spanclass="pre">0</span></code> associated with it, a picture of a dog would have the label <codeclass="docutils literal notranslate"><spanclass="pre">1</span></code> and so on. Thus, a stack of <spanclass="math notranslate nohighlight">\(N\)</span> images would have associated with it a shape-<spanclass="math notranslate nohighlight">\((N,)\)</span> array of integer labels, each label is within <spanclass="math notranslate nohighlight">\([0, 4)\)</span>.</p>
<p>Suppose we have passed our model five images, and it produced the following scores:</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="c1"># Classification scores produced by `cat_dog_goose_other`</span>
<spanclass="c1"># on five images. A shape-(5, 4) array.</span>
<spanclass="o">>>></span><spanclass="kn">import</span><spanclass="nn">numpy</span><spanclass="k">as</span><spanclass="nn">np</span>
<spanclass="o">>>></span><spanclass="n">scores</span><spanclass="o">=</span><spanclass="n">np</span><spanclass="o">.</span><spanclass="n">array</span><spanclass="p">([[</span><spanclass="mi">30</span><spanclass="p">,</span><spanclass="mi">1</span><spanclass="p">,</span><spanclass="mi">10</span><spanclass="p">,</span><spanclass="mi">80</span><spanclass="p">],</span><spanclass="c1"># prediction: other</span>
<spanclass="o">...</span><spanclass="p">[</span><spanclass="o">-</span><spanclass="mi">10</span><spanclass="p">,</span><spanclass="mi">20</span><spanclass="p">,</span><spanclass="mi">0</span><spanclass="p">,</span><spanclass="o">-</span><spanclass="mi">5</span><spanclass="p">],</span><spanclass="c1"># prediction: dog</span>
<spanclass="o">...</span><spanclass="p">[</span><spanclass="mi">27</span><spanclass="p">,</span><spanclass="mi">50</span><spanclass="p">,</span><spanclass="mi">9</span><spanclass="p">,</span><spanclass="mi">30</span><spanclass="p">],</span><spanclass="c1"># prediction: dog</span>
<spanclass="o">...</span><spanclass="p">[</span><spanclass="o">-</span><spanclass="mi">1</span><spanclass="p">,</span><spanclass="mi">0</span><spanclass="p">,</span><spanclass="mi">84</span><spanclass="p">,</span><spanclass="mi">3</span><spanclass="p">],</span><spanclass="c1"># prediction: goose</span>
<spanclass="o">...</span><spanclass="p">[</span><spanclass="mi">5</span><spanclass="p">,</span><spanclass="mi">2</span><spanclass="p">,</span><spanclass="mi">10</span><spanclass="p">,</span><spanclass="mi">0</span><spanclass="p">]])</span><spanclass="c1"># prediction: goose</span>
</pre></div>
</div>
<p>And suppose that the true labels for these five images are:</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="c1"># truth: cat, dog, dog, goose, other</span>
<spanclass="o">>>></span><spanclass="n">labels</span><spanclass="o">=</span><spanclass="n">np</span><spanclass="o">.</span><spanclass="n">array</span><spanclass="p">([</span><spanclass="mi">0</span><spanclass="p">,</span><spanclass="mi">1</span><spanclass="p">,</span><spanclass="mi">1</span><spanclass="p">,</span><spanclass="mi">2</span><spanclass="p">,</span><spanclass="mi">3</span><spanclass="p">])</span>
</pre></div>
</div>
<p>Our model classified three out of five images correctly; thus, our accuracy function should return 0.6:</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="gp">>>> </span><spanclass="n">classification_accuracy</span><spanclass="p">(</span><spanclass="n">scores</span><spanclass="p">,</span><spanclass="n">labels</span><spanclass="p">)</span>
<spanclass="go">0.6</span>
</pre></div>
</div>
<p>To generalize this problem, assume that your classifier is dealing with <spanclass="math notranslate nohighlight">\(K\)</span> classes (instead of <spanclass="math notranslate nohighlight">\(4\)</span>). Complete the following function.</p>
<p><strong>Tip:</strong> You will find it useful to leverage <aclass="reference external" href="https://numpy.org/doc/stable/reference/generated/numpy.argmax.html#numpy.argmax">numpy’s argmax function</a>`f</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="k">def</span><spanclass="nf">classification_accuracy</span><spanclass="p">(</span><spanclass="n">classification_scores</span><spanclass="p">,</span><spanclass="n">true_labels</span><spanclass="p">):</span>
<spanclass="sd">"""</span>
<spanclass="sd"> Returns the fractional classification accuracy for a batch of N predictions.</span>
<spanclass="sd"> Parameters</span>
<spanclass="sd"> ----------</span>
<spanclass="sd"> classification_scores : numpy.ndarray, shape=(N, K)</span>
<spanclass="sd"> The scores for K classes, for a batch of N pieces of data</span>
<spanclass="sd"> (e.g. images).</span>
<spanclass="sd"> true_labels : numpy.ndarray, shape=(N,)</span>
<spanclass="sd"> The true label for each datum in the batch: each label is an</span>
<spanclass="sd"> integer in the domain [0, K).</span>
<spanclass="sd"> Returns</span>
<spanclass="sd"> -------</span>
<spanclass="sd"> float</span>
<spanclass="sd"> (num_correct) / N</span>
<spanclass="sd"> """</span>
<spanclass="c1"># YOUR CODE HERE</span>
<spanclass="k">pass</span>
</pre></div>
</div>
<divclass="section" id="Unvectorized-Solution">
<h2>Unvectorized Solution<aclass="headerlink" href="#Unvectorized-Solution" title="Permalink to this headline"></a></h2>
<p>A simple approach to this problem is to first loop over the rows of our classification scores. We know that each such row stores the scores for each class for a particular data point, and that the <em>index</em> of the highest score in that row gives us the predicted label for that data point (e.g. image in our hypothetical use-case). We can then directly compare these predicted labels with the true labels to compute the accuracy.</p>
<p>We can use the function <codeclass="docutils literal notranslate"><spanclass="pre">numpy.argmax</span></code> to get the index of the highest score, and thus the predicted class-ID, for each data point. Recall that NumPy arrays use <aclass="reference external" href="https://www.pythonlikeyoumeanit.com/Module3_IntroducingNumpy/ArrayTraversal.html#How-to-Traverse-an-Array:-Row-major-%28C%29-vs-Column-major-%28F%29-Traversal-Ordering">row-major traversal ordering</a>, so performing a for-loop over <codeclass="docutils literal notranslate"><spanclass="pre">classification_scores</span></code> will yield one row of the array at a time.</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="n">pred_labels</span><spanclass="o">=</span><spanclass="p">[]</span><spanclass="c1"># Will store the N predicted class-IDs</span>
<spanclass="k">for</span><spanclass="n">row</span><spanclass="ow">in</span><spanclass="n">classification_scores</span><spanclass="p">:</span>
<spanclass="c1"># store the index associated with the highest score for each datum</span>
<spanclass="n">pred_labels</span><spanclass="o">.</span><spanclass="n">append</span><spanclass="p">(</span><spanclass="n">np</span><spanclass="o">.</span><spanclass="n">argmax</span><spanclass="p">(</span><spanclass="n">row</span><spanclass="p">))</span>
</pre></div>
</div>
<p>Next, we need to count the fraction of predicted class-IDs that match the true labels classification matches the true classification.</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="n">num_correct</span><spanclass="o">=</span><spanclass="mi">0</span>
<spanclass="k">for</span><spanclass="n">i</span><spanclass="ow">in</span><spanclass="nb">range</span><spanclass="p">(</span><spanclass="nb">len</span><spanclass="p">(</span><spanclass="n">pred_labels</span><spanclass="p">)):</span>
<spanclass="k">if</span><spanclass="n">pred_labels</span><spanclass="p">[</span><spanclass="n">i</span><spanclass="p">]</span><spanclass="o">==</span><spanclass="n">true_labels</span><spanclass="p">[</span><spanclass="n">i</span><spanclass="p">]:</span>
<spanclass="n">num_correct</span><spanclass="o">+=</span><spanclass="mi">1</span>
</pre></div>
</div>
<p>Or we can make use of <aclass="reference external" href="https://www.pythonlikeyoumeanit.com/Module2_EssentialsOfPython/Generators_and_Comprehensions.html#Creating-your-own-generator:-generator-comprehensions">a generator comprehension</a> and <aclass="reference external" href="https://www.pythonlikeyoumeanit.com/Module2_EssentialsOfPython/Itertools.html">itertools</a> to be much more succinct:</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="c1"># recall: int(True) -> 1, int(False) -> 0</span>
<spanclass="n">num_correct</span><spanclass="o">=</span><spanclass="nb">sum</span><spanclass="p">(</span><spanclass="n">p</span><spanclass="o">==</span><spanclass="n">t</span><spanclass="k">for</span><spanclass="n">p</span><spanclass="p">,</span><spanclass="n">t</span><spanclass="ow">in</span><spanclass="nb">zip</span><spanclass="p">(</span><spanclass="n">pred_labels</span><spanclass="p">,</span><spanclass="n">true_labels</span><spanclass="p">))</span>
</pre></div>
</div>
<p>We can formally write this out into the following function:</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="k">def</span><spanclass="nf">unvectorized_accuracy</span><spanclass="p">(</span><spanclass="n">classification_scores</span><spanclass="p">,</span><spanclass="n">true_labels</span><spanclass="p">):</span>
<spanclass="sd">"""</span>
<spanclass="sd"> Returns the fractional classification accuracy for a batch of N predictions.</span>
<spanclass="sd"> Parameters</span>
<spanclass="sd"> ----------</span>
<spanclass="sd"> classification_scores : numpy.ndarray, shape=(N, K)</span>
<spanclass="sd"> The scores for K classes, for a batch of N pieces of data</span>
<spanclass="sd"> (e.g. images).</span>
<spanclass="sd"> true_labels : numpy.ndarray, shape=(N,)</span>
<spanclass="sd"> The true label for each datum in the batch: each label is an</span>
<spanclass="sd"> integer in the domain [0, K).</span>
<spanclass="sd"> Returns</span>
<spanclass="sd"> -------</span>
<spanclass="sd"> float</span>
<spanclass="sd"> (num_correct) / N</span>
<spanclass="sd"> """</span>
<spanclass="n">pred_labels</span><spanclass="o">=</span><spanclass="p">[]</span><spanclass="c1"># Will store the N predicted class-IDs</span>
<spanclass="k">for</span><spanclass="n">row</span><spanclass="ow">in</span><spanclass="n">classification_scores</span><spanclass="p">:</span>
<spanclass="n">pred_labels</span><spanclass="o">.</span><spanclass="n">append</span><spanclass="p">(</span><spanclass="n">np</span><spanclass="o">.</span><spanclass="n">argmax</span><spanclass="p">(</span><spanclass="n">row</span><spanclass="p">))</span>
<spanclass="n">num_correct</span><spanclass="o">=</span><spanclass="mi">0</span>
<spanclass="k">for</span><spanclass="n">i</span><spanclass="ow">in</span><spanclass="nb">range</span><spanclass="p">(</span><spanclass="nb">len</span><spanclass="p">(</span><spanclass="n">pred_labels</span><spanclass="p">)):</span>
<spanclass="k">if</span><spanclass="n">pred_labels</span><spanclass="p">[</span><spanclass="n">i</span><spanclass="p">]</span><spanclass="o">==</span><spanclass="n">true_labels</span><spanclass="p">[</span><spanclass="n">i</span><spanclass="p">]:</span>
<spanclass="n">num_correct</span><spanclass="o">+=</span><spanclass="mi">1</span>
<spanclass="k">return</span><spanclass="n">num_correct</span><spanclass="o">/</span><spanclass="nb">len</span><spanclass="p">(</span><spanclass="n">true_labels</span><spanclass="p">)</span>
</pre></div>
</div>
<p>Testing against our example from above:</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="gp">>>> </span><spanclass="n">unvectorized_accuracy</span><spanclass="p">(</span><spanclass="n">scores</span><spanclass="p">,</span><spanclass="n">labels</span><spanclass="p">)</span>
<spanclass="go">0.6</span>
</pre></div>
</div>
<p>Horray! We have a working accuracy function! However, this function can be greatly simplified and optimized by <aclass="reference external" href="https://www.pythonlikeyoumeanit.com/Module3_IntroducingNumpy/VectorizedOperations.html">vectorizing</a> it.</p>
</div>
<divclass="section" id="Vectorized-Solution">
<h2>Vectorized Solution<aclass="headerlink" href="#Vectorized-Solution" title="Permalink to this headline"></a></h2>
<p><codeclass="docutils literal notranslate"><spanclass="pre">numpy.argmax</span></code> is one of NumPy’s <aclass="reference external" href="https://www.pythonlikeyoumeanit.com/Module3_IntroducingNumpy/VectorizedOperations.html#Sequential-Functions">vectorized sequential functions</a>. As such, it accepts <aclass="reference external" href="https://www.pythonlikeyoumeanit.com/Module3_IntroducingNumpy/VectorizedOperations.html#Specifying-the-axis-Keyword-Argument-in-Sequential-NumPy-Functions">axis as a keyword argument</a>. This means that, instead of calling <codeclass="docutils literal notranslate"><spanclass="pre">np.argmax</span></code> on each row of <codeclass="docutils literal notranslate"><spanclass="pre">classification_scores</span></code> in a for-loop, we
can simply instruct <codeclass="docutils literal notranslate"><spanclass="pre">np.argmax</span></code> to operate <em>across the columns of each row of the array</em> by specifying <codeclass="docutils literal notranslate"><spanclass="pre">axis=1</span></code>.</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="c1"># returns the column-index of the max value</span>
<spanclass="c1"># within each row of `classification_scores`</span>
<spanclass="n">pred_labels</span><spanclass="o">=</span><spanclass="n">np</span><spanclass="o">.</span><spanclass="n">argmax</span><spanclass="p">(</span><spanclass="n">classification_scores</span><spanclass="p">,</span><spanclass="n">axis</span><spanclass="o">=</span><spanclass="mi">1</span><spanclass="p">)</span>
</pre></div>
</div>
<p>This simple expression eliminates our first for-loop entirely.</p>
<p>Next, we can use NumPy’s <em>vectorized logical operations</em>, specifically <codeclass="docutils literal notranslate"><spanclass="pre">==</span></code>, to get a boolean-valued array that stores <codeclass="docutils literal notranslate"><spanclass="pre">True</span></code> wherever the predicted labels match the true labels and <codeclass="docutils literal notranslate"><spanclass="pre">False</span></code> everywhere else. Recall that <codeclass="docutils literal notranslate"><spanclass="pre">True</span></code> behaves like <codeclass="docutils literal notranslate"><spanclass="pre">1</span></code> and <codeclass="docutils literal notranslate"><spanclass="pre">False</span></code> like <codeclass="docutils literal notranslate"><spanclass="pre">0</span></code>. Thus, we can call <codeclass="docutils literal notranslate"><spanclass="pre">np.mean</span></code> on our resulting boolean-valued array to compute the number of correct predictions divided by the total number of predictions. We can thus vectorize our second for-loop with:</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="c1"># computes the fraction of correctly predicted labels</span>
<spanclass="n">frac_correct</span><spanclass="o">=</span><spanclass="n">np</span><spanclass="o">.</span><spanclass="n">mean</span><spanclass="p">(</span><spanclass="n">pred_labels</span><spanclass="o">==</span><spanclass="n">true_labels</span><spanclass="p">)</span>
</pre></div>
</div>
<p>All together, making keen use of vectorization allows us to write our classification accuracy function <em>in a single line of code</em>.</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="k">def</span><spanclass="nf">classification_accuracy</span><spanclass="p">(</span><spanclass="n">classification_scores</span><spanclass="p">,</span><spanclass="n">true_labels</span><spanclass="p">):</span>
<spanclass="sd">"""</span>
<spanclass="sd"> Returns the fractional classification accuracy for a batch of N predictions.</span>
<spanclass="sd"> Parameters</span>
<spanclass="sd"> ----------</span>
<spanclass="sd"> classification_scores : numpy.ndarray, shape=(N, K)</span>
<spanclass="sd"> The scores for K classes, for a batch of N pieces of data</span>
<spanclass="sd"> (e.g. images).</span>
<spanclass="sd"> true_labels : numpy.ndarray, shape=(N,)</span>
<spanclass="sd"> The true label for each datum in the batch: each label is an</span>
<spanclass="sd"> integer in the domain [0, K).</span>
<spanclass="sd"> Returns</span>
<spanclass="sd"> -------</span>
<spanclass="sd"> float</span>
<spanclass="sd"> (num_correct) / N</span>
<spanclass="sd"> """</span>
<spanclass="k">return</span><spanclass="n">np</span><spanclass="o">.</span><spanclass="n">mean</span><spanclass="p">(</span><spanclass="n">np</span><spanclass="o">.</span><spanclass="n">argmax</span><spanclass="p">(</span><spanclass="n">classification_scores</span><spanclass="p">,</span><spanclass="n">axis</span><spanclass="o">=</span><spanclass="mi">1</span><spanclass="p">)</span><spanclass="o">==</span><spanclass="n">true_labels</span><spanclass="p">)</span>
</pre></div>
</div>
<p>Not only is this cleaner to look at, but it was also simpler and less error-prone to write. Moreover, it is much faster than our unvectorized solution - given <spanclass="math notranslate nohighlight">\(N=10,000\)</span> data points and <spanclass="math notranslate nohighlight">\(K=100\)</span> classes, our vectorized solution is roughly <spanclass="math notranslate nohighlight">\(40\times\)</span> faster</p>
<p>(The following “time-it” code blocks must be run in independent cells in a Jupyter notebook or IPython console - <codeclass="docutils literal notranslate"><spanclass="pre">%%timeit</span></code> must be the topmost command in the cell)</p>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="gp">>>> </span><spanclass="n">N</span><spanclass="o">=</span><spanclass="mi">10000</span>
<spanclass="gp">>>> </span><spanclass="n">K</span><spanclass="o">=</span><spanclass="mi">100</span>
<spanclass="gp">>>> </span><spanclass="n">scores</span><spanclass="o">=</span><spanclass="n">np</span><spanclass="o">.</span><spanclass="n">random</span><spanclass="o">.</span><spanclass="n">rand</span><spanclass="p">(</span><spanclass="n">N</span><spanclass="p">,</span><spanclass="n">K</span><spanclass="p">)</span>
<spanclass="gp">>>> </span><spanclass="n">labels</span><spanclass="o">=</span><spanclass="n">np</span><spanclass="o">.</span><spanclass="n">random</span><spanclass="o">.</span><spanclass="n">randint</span><spanclass="p">(</span><spanclass="n">low</span><spanclass="o">=</span><spanclass="mi">0</span><spanclass="p">,</span><spanclass="n">high</span><spanclass="o">=</span><spanclass="n">K</span><spanclass="p">,</span><spanclass="n">size</span><spanclass="o">=</span><spanclass="n">N</span><spanclass="p">)</span>
</pre></div>
</div>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="gp">>>> </span><spanclass="o">%%</span><spanclass="n">timeit</span>
<spanclass="gp">... </span><spanclass="n">unvectorized_accuracy</span><spanclass="p">(</span><spanclass="n">scores</span><spanclass="p">,</span><spanclass="n">labels</span><spanclass="p">)</span>
<spanclass="go">39.5 ms ± 1.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)</span>
</pre></div>
</div>
<divclass="highlight-python notranslate"><divclass="highlight"><pre><span></span><spanclass="gp">>>> </span><spanclass="o">%%</span><spanclass="n">timeit</span>
<spanclass="gp">... </span><spanclass="n">classification_accuracy</span><spanclass="p">(</span><spanclass="n">scores</span><spanclass="p">,</span><spanclass="n">labels</span><spanclass="p">)</span>
<spanclass="go">1.6 ms ± 7.04 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)</span>
</pre></div>
</div>
</div>
</div>
</div>
</div>
<footer><divclass="rst-footer-buttons" role="navigation" aria-label="Footer">
<ahref="../../module_3_problems.html" class="btn btn-neutral float-left" title="Module 3: Problems" accesskey="p" rel="prev"><spanclass="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<ahref="Approximating_pi.html" class="btn btn-neutral float-right" title="Playing Darts and Estimating Pi" accesskey="n" rel="next">Next <spanclass="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<divrole="contentinfo">
<p>© Copyright 2021, Ryan Soklaski.</p>
</div>
Built with <ahref="https://www.sphinx-doc.org/">Sphinx</a> using a
<ahref="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <ahref="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function(){
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>