-
Notifications
You must be signed in to change notification settings - Fork 0
/
diff-after-before.tex
585 lines (434 loc) · 55.9 KB
/
diff-after-before.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
\documentclass[conference]{IEEEtran}
%DIF LATEXDIFF DIFFERENCE FILE
\IEEEoverridecommandlockouts
\usepackage{cite}
\usepackage{amsmath,amssymb,amsfonts}
\usepackage{graphicx}
\usepackage{textcomp}
\usepackage{xcolor}
\usepackage{algorithm}
\usepackage{algpseudocode}
% Emad Added the bellow packages
% \usepackage{todonotes}
\usepackage[inline]{enumitem}
\usepackage{listings}
\usepackage[switch]{lineno}
\lstset{
numbers=right,
stepnumber=1,
firstnumber=1,
numberfirstline=true
}
\def\BibTeX{{\rm B\kern-.05em{\sc i\kern-.025em b}\kern-.08em
T\kern-.1667em\lower.7ex\hbox{E}\kern-.125emX}}
%DIF PREAMBLE EXTENSION ADDED BY LATEXDIFF
%DIF UNDERLINE PREAMBLE %DIF PREAMBLE
\RequirePackage[normalem]{ulem} %DIF PREAMBLE
\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1} %DIF PREAMBLE
\providecommand{\DIFadd}[1]{{\protect\color{blue}\uwave{#1}}} %DIF PREAMBLE
\providecommand{\DIFdel}[1]{{\protect\color{red}\sout{#1}}} %DIF PREAMBLE
%DIF SAFE PREAMBLE %DIF PREAMBLE
\providecommand{\DIFaddbegin}{} %DIF PREAMBLE
\providecommand{\DIFaddend}{} %DIF PREAMBLE
\providecommand{\DIFdelbegin}{} %DIF PREAMBLE
\providecommand{\DIFdelend}{} %DIF PREAMBLE
%DIF FLOATSAFE PREAMBLE %DIF PREAMBLE
\providecommand{\DIFaddFL}[1]{\DIFadd{#1}} %DIF PREAMBLE
\providecommand{\DIFdelFL}[1]{\DIFdel{#1}} %DIF PREAMBLE
\providecommand{\DIFaddbeginFL}{} %DIF PREAMBLE
\providecommand{\DIFaddendFL}{} %DIF PREAMBLE
\providecommand{\DIFdelbeginFL}{} %DIF PREAMBLE
\providecommand{\DIFdelendFL}{} %DIF PREAMBLE
\newcommand{\DIFscaledelfig}{0.5}
%DIF HIGHLIGHTGRAPHICS PREAMBLE %DIF PREAMBLE
\RequirePackage{settobox} %DIF PREAMBLE
\RequirePackage{letltxmacro} %DIF PREAMBLE
\newsavebox{\DIFdelgraphicsbox} %DIF PREAMBLE
\newlength{\DIFdelgraphicswidth} %DIF PREAMBLE
\newlength{\DIFdelgraphicsheight} %DIF PREAMBLE
% store original definition of \includegraphics %DIF PREAMBLE
\LetLtxMacro{\DIFOincludegraphics}{\includegraphics} %DIF PREAMBLE
\newcommand{\DIFaddincludegraphics}[2][]{{\color{blue}\fbox{\DIFOincludegraphics[#1]{#2}}}} %DIF PREAMBLE
\newcommand{\DIFdelincludegraphics}[2][]{% %DIF PREAMBLE
\sbox{\DIFdelgraphicsbox}{\DIFOincludegraphics[#1]{#2}}% %DIF PREAMBLE
\settoboxwidth{\DIFdelgraphicswidth}{\DIFdelgraphicsbox} %DIF PREAMBLE
\settoboxtotalheight{\DIFdelgraphicsheight}{\DIFdelgraphicsbox} %DIF PREAMBLE
\scalebox{\DIFscaledelfig}{% %DIF PREAMBLE
\parbox[b]{\DIFdelgraphicswidth}{\usebox{\DIFdelgraphicsbox}\\[-\baselineskip] \rule{\DIFdelgraphicswidth}{0em}}\llap{\resizebox{\DIFdelgraphicswidth}{\DIFdelgraphicsheight}{% %DIF PREAMBLE
\setlength{\unitlength}{\DIFdelgraphicswidth}% %DIF PREAMBLE
\begin{picture}(1,1)% %DIF PREAMBLE
\thicklines\linethickness{2pt} %DIF PREAMBLE
{\color[rgb]{1,0,0}\put(0,0){\framebox(1,1){}}}% %DIF PREAMBLE
{\color[rgb]{1,0,0}\put(0,0){\line( 1,1){1}}}% %DIF PREAMBLE
{\color[rgb]{1,0,0}\put(0,1){\line(1,-1){1}}}% %DIF PREAMBLE
\end{picture}% %DIF PREAMBLE
}\hspace*{3pt}}} %DIF PREAMBLE
} %DIF PREAMBLE
\LetLtxMacro{\DIFOaddbegin}{\DIFaddbegin} %DIF PREAMBLE
\LetLtxMacro{\DIFOaddend}{\DIFaddend} %DIF PREAMBLE
\LetLtxMacro{\DIFOdelbegin}{\DIFdelbegin} %DIF PREAMBLE
\LetLtxMacro{\DIFOdelend}{\DIFdelend} %DIF PREAMBLE
\DeclareRobustCommand{\DIFaddbegin}{\DIFOaddbegin \let\includegraphics\DIFaddincludegraphics} %DIF PREAMBLE
\DeclareRobustCommand{\DIFaddend}{\DIFOaddend \let\includegraphics\DIFOincludegraphics} %DIF PREAMBLE
\DeclareRobustCommand{\DIFdelbegin}{\DIFOdelbegin \let\includegraphics\DIFdelincludegraphics} %DIF PREAMBLE
\DeclareRobustCommand{\DIFdelend}{\DIFOaddend \let\includegraphics\DIFOincludegraphics} %DIF PREAMBLE
\LetLtxMacro{\DIFOaddbeginFL}{\DIFaddbeginFL} %DIF PREAMBLE
\LetLtxMacro{\DIFOaddendFL}{\DIFaddendFL} %DIF PREAMBLE
\LetLtxMacro{\DIFOdelbeginFL}{\DIFdelbeginFL} %DIF PREAMBLE
\LetLtxMacro{\DIFOdelendFL}{\DIFdelendFL} %DIF PREAMBLE
\DeclareRobustCommand{\DIFaddbeginFL}{\DIFOaddbeginFL \let\includegraphics\DIFaddincludegraphics} %DIF PREAMBLE
\DeclareRobustCommand{\DIFaddendFL}{\DIFOaddendFL \let\includegraphics\DIFOincludegraphics} %DIF PREAMBLE
\DeclareRobustCommand{\DIFdelbeginFL}{\DIFOdelbeginFL \let\includegraphics\DIFdelincludegraphics} %DIF PREAMBLE
\DeclareRobustCommand{\DIFdelendFL}{\DIFOaddendFL \let\includegraphics\DIFOincludegraphics} %DIF PREAMBLE
%DIF LISTINGS PREAMBLE %DIF PREAMBLE
\RequirePackage{listings} %DIF PREAMBLE
\RequirePackage{color} %DIF PREAMBLE
\lstdefinelanguage{DIFcode}{ %DIF PREAMBLE
%DIF DIFCODE_UNDERLINE %DIF PREAMBLE
moredelim=[il][\color{red}\sout]{\%DIF\ <\ }, %DIF PREAMBLE
moredelim=[il][\color{blue}\uwave]{\%DIF\ >\ } %DIF PREAMBLE
} %DIF PREAMBLE
\lstdefinestyle{DIFverbatimstyle}{ %DIF PREAMBLE
language=DIFcode, %DIF PREAMBLE
basicstyle=\ttfamily, %DIF PREAMBLE
columns=fullflexible, %DIF PREAMBLE
keepspaces=true %DIF PREAMBLE
} %DIF PREAMBLE
\lstnewenvironment{DIFverbatim}{\lstset{style=DIFverbatimstyle}}{} %DIF PREAMBLE
\lstnewenvironment{DIFverbatim*}{\lstset{style=DIFverbatimstyle,showspaces=true}}{} %DIF PREAMBLE
%DIF END PREAMBLE EXTENSION ADDED BY LATEXDIFF
\begin{document}
\title{Find Unique Usages: Helping Developers Understand Common Usages}
\author{\DIFdelbegin %DIFDELCMD < \IEEEauthorblockN{1\textsuperscript{st} Given Name Surname}
%DIFDELCMD < \IEEEauthorblockA{\textit{dept. name of organization (of Aff.)} \\
%DIFDELCMD < \textit{name of organization (of Aff.)}\\
%DIFDELCMD < City, Country \\
%DIFDELCMD < email address}
%DIFDELCMD < %%%
\DIFdelend
\DIFaddbegin \IEEEauthorblockN{Emad Aghayi}
\IEEEauthorblockA{\textit{Department of Computer Science} \\
\textit{George Mason University}\\
Fairfax, VA \\
\DIFaddend \and
\DIFdelbegin %DIFDELCMD < \IEEEauthorblockN{2\textsuperscript{nd} Given Name Surname}
%DIFDELCMD < \IEEEauthorblockA{\textit{dept. name of organization (of Aff.)} \\
%DIFDELCMD < \textit{name of organization (of Aff.)}\\
%DIFDELCMD < City, Country \\
%DIFDELCMD < email address}
%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \IEEEauthorblockN{Aaron Massey}
\IEEEauthorblockA{\textit{Department of Computer Science} \\
\textit{George Mason University}\\
Fairfax, VA \\
\DIFaddend \and
\DIFdelbegin %DIFDELCMD < \IEEEauthorblockN{3\textsuperscript{rd} Given Name Surname}
%DIFDELCMD < \IEEEauthorblockA{\textit{dept. name of organization (of Aff.)} \\
%DIFDELCMD < \textit{name of organization (of Aff.)}\\
%DIFDELCMD < City, Country \\
%DIFDELCMD < email address}
%DIFDELCMD <
%DIFDELCMD < %%%
\DIFdelend \DIFaddbegin \IEEEauthorblockN{Thomas D. LaToza}
\IEEEauthorblockA{\textit{Department of Computer Science} \\
\textit{George Mason University}\\
Fairfax, VA \\
\DIFaddend }
%DIF < \author{
%DIF < \IEEEauthorblockN{Emad Aghayi}
%DIF < \IEEEauthorblockA{\textit{Department Of computer Science} \\
%DIF < \textit{George Mason University}\\
%DIF < Fairfax, VA \\
%DIF < [email protected]}
%DIF < \and
%DIF < \IEEEauthorblockN{Aaron Massey}
%DIF < \IEEEauthorblockA{\textit{Department Of computer Science} \\
%DIF < \textit{George Mason University}\\
%DIF < Fairfax, VA \\
%DIF < [email protected]}
%DIF < \and
%DIF < \IEEEauthorblockN{Thomas LaToza}
%DIF < \IEEEauthorblockA{\textit{Department Of computer Science} \\
%DIF < \textit{George Mason University}\\
%DIF < Fairfax, VA \\
%DIF < [email protected]}
%DIF < % \and
%DIF < % \IEEEauthorblockN{3\textsuperscript{rd} Given Name Surname}
%DIF < % \IEEEauthorblockA{\textit{Department Of computer Science} \\
%DIF < % \textit{George Mason University}\\
%DIF < % Fairfax, VA \\
%DIF < % email address or ORCID}
%DIF < }
\DIFdelbegin %DIFDELCMD <
%DIFDELCMD < %%%
\DIFdelend \maketitle
\begin{abstract}
When working in large and complex codebases, developers face challenges using \textit{Find Usages} to understand how to reuse classes and methods. To better understand these challenges, we conducted a small exploratory study with 4 participants. We found that developers often wasted time reading long lists of similar usages or prematurely focused on a single usage. Based on these findings, we hypothesized that clustering usages by the similarity of their surrounding context might enable developers to more rapidly understand how to use a function. To explore this idea, we designed and implemented \textit{Find Unique Usages}, which extracts usages, computes a diff between pairs of usages, generates similarity scores, and uses these scores to form usage clusters. To evaluate this approach, we conducted a controlled experiment with 12 participants. We found that developers with \textit{Find Unique Usages} were significantly faster, completing their task in 35\% less time.
\end{abstract}
%DIF < Aaron: "The results of the think-aloud6experiment highlighted that developers spend considerable time7learning to use internal code artifacts." This might be too strong of a statement. What we found is that given only access to the source code of the artifacts and examples on its usage within a codebase, there was some difficulty or at least time spent.
\DIFdelbegin %DIFDELCMD <
%DIFDELCMD < %%%
\DIFdelend \begin{IEEEkeywords}
Software reuse, code navigation, programming tools, development environments
\end{IEEEkeywords}
% ***********************************************Introduction***************************************
\section{Introduction}
When developers look to reuse existing functionality in their codebase by interacting with classes and calling methods, developers work to understand existing code. Developers report understanding existing code is one of their most time-consuming activities~\cite{latoza2006maintaining}. Developers generally avoid relying on documentation, which is frequently out of date and poorly written~\cite{documentation} and may not answer low-level questions about hidden contracts, implementation details, or side effects~\cite{head2018not}. Instead, developers tend to rely primarily on the code itself~\cite{head2018not, latoza2006maintaining}. \DIFdelbegin \DIFdel{Developers frequently searchfor code examples~\mbox{%DIFAUXCMD
\cite{brandt2009two,sadowski2015developers} }\hspace{0pt}%DIFAUXCMD
and }\DIFdelend \DIFaddbegin \DIFadd{The most frequent developer activity is code search~\mbox{%DIFAUXCMD
\cite{singer2010examination,brandt2009two,sadowski2015developers}}\hspace{0pt}%DIFAUXCMD
. 50\% of developers search for code frequently and 39\% search occasionally~\mbox{%DIFAUXCMD
\cite{sim2011well}}\hspace{0pt}%DIFAUXCMD
, and 92\% of developers searched when they were working on maintenance tasks~\mbox{%DIFAUXCMD
\cite{lawrance2008using}}\hspace{0pt}%DIFAUXCMD
. Therefore, developers }\DIFaddend spend a significant amount of time navigating and searching existing code~\cite{piorkowski2016foraging,ko2006exploratory}. \DIFaddbegin \DIFadd{Given the centrality of code search to understanding how to reuse functionality within a codebase, even small improvements in making this process more successful and effective may have an important impact.
}\DIFaddend
A variety of tools assist developers in navigating and searching code~\cite{augustine2015field,ko2006exploratory,albusays2017interviews}. Modern development environments offer tools to \textit{Find Usages} or navigate the \textit{Call Hierarchy}. For example, the JetBrains IDE offers developers a window which lists results of all usages of a class or method. To understand how to use a method, a developer may simply read each invocation of a method. In this way, these IDE tools are envisioned to support the process of code reuse and help developers to identify and understand the ways in which methods are used.
%of Eclipse that Provided this ability for developers. One utilizing example of this tool is that when developers write or edit code, they might come across code elements that they want to change or delete. Before they make changes, they look where the code element is used and how it affects the codebase. Although these tools are the most popular, they have many unaddressed challenges. In large codebases, similar usages are frequently happen. Because of that in this kind of codebases, they are facing a high number of duplicate usages.\par
%After they found the example, they are trying to understand it. Developers trying to find usage of that example in different location of the codebase. As a next step, developers often, after finding and understanding the code example, copy and paste the code then change it. A filed study showed developers on average in one hour make four copy and paste code (code clone)~\cite{kim2004ethnographic}. Before they make changes in the cloned code, developers look where the code element is used and how it affects the codebase. In this step developer for addressing their concerns, they again need tools to search and navigate in the codebase.\par
% Another daily issue is navigation from one statement other to task-related code. Sometimes developers want to go through related statements to understand the codebase. Also, they are facing challenges to answer questions about how a change affects callers. When developers want to change an artifact before changing that they check the side effect of that change. They go through the codebase and evaluate the effect of change on them. There exist multiple approaches for better support of structural navigation.\par
% During programming tasks, it is common for a developer to use an object or routine they were previously unfamiliar with. A programmer could also be unfamiliar with how a resource is used within a codebase versus the resource itself. Typically, programmers scour the web for documentation or examples~\cite{brandt2009two,parnin2011measuring}. However, this is not an option for closed source code, forcing developers to rely on internal documentation and examples. Since this may be non-existent or worse (incorrect), example code is a more reliable information source for programmers. The industry-standard approach for finding examples is the \textit{Find Usages}/References tool. However, when large numbers of references are returned from this tool, it can be not very easy for users to parse the results visually .\par
% Developers, when they write or edit code, they might come across code element that they want to change or delete. Before they make changes, they look where the code element is used and how it affects the codebase. \textit{Find Usages} tool in Jetbrains IDEs and \textit{Open Call Hierarchy} tool of Eclipse Provided this ability for developers. Results of \textit{Find Usages} are listed in a window in Jetbrains IDEs that developers are going through the results to find the desired usage or understand the codebase. In large codebases similar usages is more possible, because of that in this kind of codebases they are facing with with high number of duplicate usages.\par
To investigate the challenges developers may face in using \textit{Find Usages} to reuse code, we conducted a small exploratory study where we observed four developers implementing a feature in an open source codebase. We found that developers sometimes became overwhelmed by the number of usage results listed. Instead of investigating more usages, participants instead focused on a single usage as \DIFdelbegin \DIFdel{as }\DIFdelend an example for some time before moving on to investigate other potentially better examples. This would result in participants learning less from code examples through usages than they could have.
Based on these findings, we hypothesized that, rather than listing all usages separately, developers might be able to more rapidly understand how to use a function through a usages view organized into groups based on the similarity of the surrounding code. \par
To explore this idea, we propose \textit{Find Unique Usages}, which clusters usages by similarity and displays usage clusters to the developer. \textit{Find Unique Usages} extracts usages of elements, computes a diff between pairs of usages, generates similarity scores, and uses these scores to form usage clusters.
To evaluate \textit{Find Unique Usages}, we conducted an experiment where 12 developers implemented a small feature in an open-source codebase. We found that developers with \textit{Find Unique Usages} were significantly faster, completing their task in 35\% less time.\par
In the rest of this paper, we first review related work. We then describe our exploratory study of how developers use \textit{Find Usages}. Based on our findings, we then present the design of \textit{Find Unique Usages} and an evaluation of its use. Finally, we conclude with a discussion of limitations as well as opportunities and future directions.
\begin{figure*}
\centering
\includegraphics [width=\textwidth,keepaspectratio,clip]{figures/Both_tools}
\caption{Using the traditional \textit{Find Usages} tool, 31 usages are listed based on the package where they are located. In \textit{Find Unique Usages}, these 31 usages are clustered into three clusters, offering developers a summarized view of usages.}
\label{fig:compare}
\end{figure*}
% ***********************************************Related Work***************************************
\section{Related Work}
Our work builds on prior studies of how developers work to understand code and tools to help developers in understanding code.
In their daily tasks, developers collect information from peers, code, documentation, and other resources~\cite{latoza2006maintaining}. Information foraging theory~\cite{pirolli1999informationforaging} has been used to describe how software developers search for information in code~\cite{fleming2013information}. According to this theory, developers try to maximize the value of information they expect to obtain and minimize the cost of navigation. When developers choose where to look, they estimate the expected value of information they find in that location and the cost of finding it. Half of navigation choices lead to less value than what developers expected~\cite{piorkowski2016foraging}. \par
Understanding how developers navigate while foraging for information is important. Developers spend 35\% to 50\% of their time navigating through source code during software development activities~\cite{ko2006exploratory,piorkowski2013whats}. Navigating and re-finding places in code that have already been visited is frequent, difficult, and distracting~\cite{ko2005eliciting,deline2005towards}. Many tools try to help developers optimize code navigation. Structural relationship traversal tools let developers traverse relationships between code elements to find other related code~\cite{karrer2011stacksplorer,augustine2015field,latoza2011visualizing}. Recommender tools predict relevant elements based on the history generated when developers worked on similar tasks~\cite{zimmermann2005mining,deline2005easing}. Task context navigation tools make it easier to navigate back and forth between task context elements~\cite{ko2006exploratory}. However, code navigation remains challenging for developers~\cite{albusays2017interviews}.\par
One reason developers forage for information is to find code examples~\cite{rosson1996reuse, brandt2009two}. Developers look for examples of how to use specific methods or objects~\cite{stylos2006mica,umarji2008archetypal}. Opportunistic developers are more likely to use example code rather than systematic developers~\cite{head2018not}. There exist a number of techniques intended to support code reuse. For instance, IDEs have search tools that enable developers to look for example code.\par
Developers often reuse existing code in their codebase to complete their tasks. A common way to edit code is by copying similar existing code, creating a code clone~\cite{codeCloneDetection2019,hou2009cnp}. Developers create code clones to apply basic templates, apply design patterns, or reuse the definition of specific behavior~\cite{kim2004ethnographic,kapser2008cloning}. Code clones in many cases are harmful, but there may be some situations where code clones are beneficial. Many tools detect harmful code clones in codebases~\cite{bellon2007comparison}. \DIFaddbegin \DIFadd{Our tool is innovative in adopting the idea of code clone detection approaches to the problem of summarizing usage sites. Existing code clone detectors focus on identifying code duplication indicating code smells to be refactored.}\DIFaddend \par
Our work builds on these past studies and tools, contributing a study of how developers work with the \textit{Find Usages} tool and a novel technique for reducing information overload in its use.
% The last sentence here isn't very good and should be rewritten.
\begin{figure*}[h]
\centering
\includegraphics [width=15cm,height=10cm,keepaspectratio,clip]{figures/challenge}
\caption{Developers face challenges in working with the \textit{Find Usages} tool. In this example, the results list 31 usages of the "initCapacity" method. Developers were forced to go through these 31 results.
%and also use a pen and paper to find the possible inputs for this method.
While many provided the same input values (e.g., 10), discovering this was annoying and time consuming for participants.
}
\label{fig:usege}
\end{figure*}
% ***********************************************Exploratory Study***************************************
\section{Study 1: Challenges with Find Usages}
To better understand how developers use \textit{Find Usages} and the challenges they face, we conducted a formative observational study in which 4 participants worked to implement a feature in an unfamiliar codebase.
\subsection{Method}
We recruited four participants (P1, P2, P3, P4), two female and two male. The first participant \DIFaddbegin \DIFadd{(P1) }\DIFaddend was a graduate computer science student at our institution and participated as a pilot participant. The remaining three participants worked as a software developer in industry. Two participants had less than a year of industry experience \DIFaddbegin \DIFadd{(P2 and P4)}\DIFaddend , and the third had more than four years of experience \DIFaddbegin \DIFadd{(P3)}\DIFaddend .\par
%We designed two tasks for the study. The first task was training task it supposed to train participant how to use \textit{Find Usages} tool. The second task was implementing a logic in a codebase. We collected data from the interaction of developers while they were working on both tasks. \par
To ensure participants were familiar with the \textit{Find Usages} tools, participants first completed a training task. Three of the four participants were unaware of \textit{Find Usages}. %The goal of training task was training participants to be representative of all real world developers, regardless of if they are familiar with it or not.
Participants worked in the Google Guava project, a 772,475 LOC open-source library written in Java. To focus participants on familiarizing themselves with \textit{Find Usages}, we asked participants to qualitatively describe the range of integer inputs to two methods used in the codebase. Participants were given a maximum of 10 minutes to reach the point where they had become comfortable with the \textit{Find Usages} tool. \par
To observe developers working with \textit{Find Usages} in a more realistic programming task, the main task asked participants to implement a feature. Participants worked on the FlyingSaucer project, a pure Java library for rendering XML, XHTML, and CSS written in approximately 99,000 LOC. We removed the statements that create a PDF and asked participants to implement functionality to produce “success.pdf.” To focus participants' attention on the code, participants were instructed to treat the codebase as closed source. They were instructed not to look for online documentation or code examples. Participants were free to choose any IDE they wished to use to accomplish the task.
During the study, participants first completed the training task before beginning the main task.
Participants were asked to think-aloud as they worked.
At the and of the study, participants completed a survey about their experiences. We asked participants to share challenges they experienced 1) finding a method, variable, or other element, 2) beginning work, 3) understanding and working with the provided codebase, 4) related to their productivity. The study lasted approximately 50 minutes for each participant.
We piloted our initial study design with the graduate student participant (P1). We found the tasks to be appropriate and clarified the instructions. The other participants reported that the tasks were similar to what they did in their job in industry.
%\begin{quote}
%"Not out of the ordinary [from regular industry/work experience] but still a bit overwhelming"- (P2)
%\end{quote}
\begin{quote} "This is just like my job" and "I do this at work all the time, no one knows how anything works but we see how things are used"
- (P3)
\end{quote}
\begin{figure*}
\centering
\includegraphics [width=\textwidth,keepaspectratio,clip]{figures/GeneralView2.pdf}
\caption{\textit{Find Unique Usages} computes usage clusters through four steps. It first collects usages and creates an AST for each usage. It then builds a diff between each pair of usages. From these diffs, it then calculates a similarity score between each pair. Finally, it clusters usages based on their similarity scores, using a threshold to determine when to assign it to an existing cluster or create a new cluster.}
\label{fig:generalview}
\end{figure*}
\subsection{Results}
\DIFaddbegin \subsubsection{\DIFadd{Quantitative results}}\DIFaddend All three participants successfully completed the main task, working for 49, 20, 42 minutes. We examined their activity to understand their use of \textit{Find Usages} and the challenges they faced.
%One of them used the term "implemented" to mean "usage" frequently.
\DIFaddbegin \subsubsection{\DIFadd{Qualitative results}}\DIFaddend Participants were often exploratory in their work, making decisions without exhaustively gathering evidence.
%In some cases, participants did not have a strong evidence or reason for understanding the codebase. They were trying to use their feeling.
\begin{quote}"I do not know how .layout() is used but I am going to use it because it's in this code" - (P2) \end{quote}
\begin{quote}"I feel like this [.layout()] should just work, I hope!" - (P4) \end{quote}
\noindent Two participants felt overwhelmed by the size of the codebase.
\begin{quote}"Overwhelmed by the amount of code." - (P2) \end{quote}
\begin{quote}"Large codebases are my biggest fear." - (P4) \end{quote}
\DIFdelbegin \DIFdel{Participants }\DIFdelend \DIFaddbegin \DIFadd{Two participants }\DIFaddend used the unit tests included in the project \DIFdelbegin \DIFdel{to }\DIFdelend as examples to determine how methods are typically invoked and what the possible input arguments for those methods may be. Participants often stuck with a specific test case, and did not go looking for others.
\begin{quote} "Tests are a good example of uses." - (P3)\end{quote}
The similarity of \textit{Find Usages} results was a significant challenge for participants. When many results were found (e.g., Fig.~\ref{fig:usege}), it was challenging for participants\DIFaddbegin \DIFadd{.
}\DIFaddend %to identify useful information in understanding the method or object.
Much \DIFaddbegin \DIFadd{of the }\DIFaddend code looked similar, and \DIFdelbegin \DIFdel{it }\DIFdelend was difficult for the participant \DIFaddbegin \DIFadd{to }\DIFaddend observe differences. This was particularly challenging when usages were complex and they navigated call graphs.
\begin{quote}"There were a ton of methods and usages that were really similar and it was a lot to put together"- (P4)\end{quote}
\begin{quote}"I had difficulty finding usages with low complexity of calls and uses"- (P4)\end{quote}
Participants often scrolled quickly through usages when the surrounding code did not \DIFdelbegin \DIFdel{making }\DIFdelend \DIFaddbegin \DIFadd{make }\DIFaddend calls similar to those \DIFdelbegin \DIFdel{she }\DIFdelend \DIFaddbegin \DIFadd{they }\DIFaddend wished to make. \DIFdelbegin \DIFdel{Participants }\DIFdelend \DIFaddbegin \DIFadd{All participants }\DIFaddend sometimes navigated to usages \DIFdelbegin \DIFdel{, which }\DIFdelend \DIFaddbegin \DIFadd{that }\DIFaddend did not help them complete the task and wasted time.
Surprisingly, participants tended to focus on the first usage. By default, IntelliJ IDEA highlights the first usage in the results window. Participants copied the first usage, pasted it where this wished to reuse the functionality, and adapted it as they needed.\par
%The confusing thing about \textit{Find Usages} is happen when a method is overloaded.
\DIFdelbegin \DIFdel{Participants }\DIFdelend \DIFaddbegin \DIFadd{One participant }\DIFaddend had difficulty working with \textit{Find Usages} in the presence of overloaded methods. When they found usages where the types or number of input arguments were different, they navigated between them to understand why there were different implementations of a method.
% ***********************************************System***************************************
\section{Find Unique Usages Tool}
The formative study revealed that developers face challenges working with Find Usage results containing many highly similar results. We hypothesized that clustering similar usages might help developers understand usages more quickly and easily.
%We brainstormed on the results and came up with the idea that the refining result of \textit{Find Usages} might help developers and understand results quicker. In order to design a better tool for addressing the issues, we must understand what is going on in the developers' world and understand how our tool can make the developers. Therefore, we summarized research findings into storyboards. After that, we designed a tool for IntelliJ IDEA that refines the result of the regular \textit{Find Usages} tool. The tool aggregates the results and shows the only relevant results. Relevant results likely depend on a measure of sameness. A screenshot of the tool is depicted on Fig.\ref{fig:compare}. \par
To explore this hypothesis, we designed \textit{Find Unique Usages} (Fig.\ref{fig:compare}).
\textit{Find Unique Usages} clusters usages based on each usage's surrounding code and displays the clusters to the user. To identify similar usages, we use Gumtree Spoon AST Diff~\cite{falleri2014fine} to calculate a diff between each usage site and then iteratively cluster usages. Fig.~\ref{fig:generalview} offers an overview of our approach. \textit{Find Unique Usages} computes usage clusters through four steps:
1) collecting raw usages and creating ASTs for each usage, 2) calculating a diff between the ASTs of each pair of usages, 3) using the diffs to calculate a similarity score for each pair of usages, 4) iteratively clustering usages based on the similarity scores. \textit{Find Unique Usages} is implemented as an IntelliJ plugin.
% *********************************************************************User Interface************************************
%\textbf{Backend of Fuind Unique Usage:} Backend of \textit{Find Unique Usages} handle all logic of collecting usages and clustering them .Usages were clustered based on similar code within the usage's containing method code block. For every usage, IntelliJ IDEA calls our aggregate usage method, which returns a reference to a particular usage cluster. An internal IntelliJ IDEA object represents clusters of usages. The back-end engine compares the given usage with all other usages in all existing usage clusters. The back-end engine consider the given usage to be a part of a usage cluster when it has a guaranteed minimum threshold of similarity with every abstract syntax tree (AST) associated with that usage cluster. If all the usage clusters have minimum similarities that do not meet the threshold, then back-end creates a new usage cluster with the supplied usage and then return that usage cluster. Logic of the back-end is depicted in Fig.~\ref{fig:flowchart}. \par
%\begin{figure}
% \centering
% \includegraphics [width=\columnwidth,keepaspectratio, clip]{figures/flowchart2}
% \caption{Flowchart shows how the back-end of \textit{Find Unique Usages} work. }
%\label{fig:flowchart}
%\end{figure}
%The backend of \textit{Find Unique Usages} has 4 main parts,
\subsection{Collecting Usages and Building ASTs}
In the first step, \textit{Find Unique Usages} collects usages from the codebase. For each usage, it generates an AST, which includes the entire body of the method containing the usage. The list of usage statements is retrieved through the IntelliJ API. IntelliJ is then used to generate an AST for each usage.
%It also has another API for accessing any Program Structure Interface (PSI) elements. It gives a tree for any element of the codebase. We passed usages one by one as an element to the API. Then the API provides a tree for that element. Traversing the tree give information about parents or child of each usage. The backend by traversing trees for each of the code blocks that usages are placed creates an AST.
\par
In our initial design, we considered usages as only the usage statement itself and did not consider any of the code surrounding the usage. In preliminary tests of our tool, we found that the results found remained similar to \textit{Find Usages}. We thus increased the scope of the usage to the entire method body containing the usage.
%We tried to find the similarity among trees (code block around usages) instead of leaf nodes (usage statements) of ASTs. Therefore, we increased the scope and consider the entire body of the method that usage places in as a sub-tree. Our approach creates an AST from all elements there exist in the body of the method.
\par
%\begin{lstlisting}[language=Java, caption=An example is shown in the below. If a developer gets Find Unique Usage of the \textbf{add} method. Our approach creates an AST for usage in line 2. The AST root is \textbf{add} which it has two children line 2 and line 3. Our approach creates an AST from all elements there exist in the body of the method.]
% public void callSum(){
% int theSum = add(1, 3);
% System.out.print(theSum);
% }
% public int add(int value1,int value2){
% return value1 + value2;
% }
%\end{lstlisting}
\subsection{Building Diffs of ASTs}
Using the ASTs of each usage, \textit{Find Unique Usages} computes a diff of each pair of usages. In an early prototype, we computed the difference between ASTs using string edit distance, which we found worked poorly. We instead adopted the GumTree algorithm~\cite{baxter1998clone,DBLP:conf/kbse/FalleriMBMM14,falleri2014fine}, which computes a structure aware diff of each pair of function bodies. GumTree was initially created to support Source Code Management (SCM), while we adopt it to cluster code. \par
\subsection{Scoring Similarity}
Rather than identify usages which constitute an exact match or count only the number of nodes which vary in the diff, we adapted an approach from prior work for computing similarity~\cite{baxter1998clone}.
%The similarity used a threshold that specifies how similar to trees should be.
Similarity is computed as:
\begin{equation}
Similarity = 2 \times Shared \div (2 \times Shared + AST1 + AST2)
\label{equation1}
\end{equation}
\noindent where $Shared$ is the number of shared nodes between two trees calculated by GumTree, $AST1$ is the number of nodes which differ in usage 1 and $AST2$ is the number of nodes which differ in usage 2.
Similarity scores are calculated for all pairs of usages.
\subsection{Clustering Usages}
\textit{Find Unique Usages} next computes clusters of usages based on their similarity scores. It use a max of min algorithm. It first finds the minimum similarity between the usage and all members of all clusters separately and memoizes them. Based on max of mins similarity, it chooses the best cluster. To do so, it uses two algorithms, one to find the minimum similarity of all clusters with the usage and another to find the best cluster.\par
Algorithm 1 first finds a minimum similarity between a usage to be clustered and all members of a cluster.\par
\begin{algorithm}
\label{algo1}
\caption{Minimum Similarity in a Usage cluster - minSimilarity($x$, $G_{i}$)}
\begin{algorithmic}[1]
\State Given a Usage $x$ and a usage cluster $G_{i}$
% \If{$G_{i}$ = null}
% \State return $-\infty$
% \EndIf
\State $minSimilarity$ $\leftarrow$ $\infty$
\For{each usage $u_{i}$ in $G_{i}$}
\State $minSimilarity$$\leftarrow$min($minSimilarity$,similarity($x$,$u_{i}$)) \EndFor
\State retrun $minSimilarity$
\end{algorithmic}
\
\end{algorithm}
Next, Algorithm 2 is used to find the most similar cluster for the usage. This algorithm iterates over all clusters, using Algorithm 1 to find the most similar cluster. It next considers a similarity \textit{threshold} to determine if the cluster is sufficiently threshold. The threshold controls the number of clusters which will be created, with higher thresholds generating fewer clusters~\cite{deng2013top}.
%. For example if it choose threshold \textgreater 90\% means very similar, \textgreater 70\% means similar, \textless 40\% means not so similar. A small threshold involves many different usages, and a large threshold might lead to a few results~\cite{deng2013top}.
We arbitrarily chose a similarity threshold of approximately 88\% after experimenting with a number of examples. The algorithm then uses this threshold to determine if a usage should be placed in a new cluster or added to the most similar cluster.
\begin{algorithm}
\label{algo2}
\caption{Find Corresponding Usage cluster}
\begin{algorithmic}[1]
\State // Given a Usage $x$ \& a set of Usage Clusters $G$
\State $mostSimilarCluster$ $\leftarrow$ $null$
\For{each Usage Cluster $G_{i}$ in $G$}
\If{minSimilarity($x$,$mostSimilarCluster$)$<$ minSimilarity($x$, $G_{i}$)}
\State $mostSimilarCluster$ $\leftarrow$ $G_{i}$
\EndIf
\EndFor
\State // Given some similarity threshold $T$
\If{minSimilarity($x$, $mostSimilarCluster$) $<$ $T$}
\State // Create new usage cluster and modify $G$.
\State $G_{new}$ $\leftarrow$ newUsageClusterWithInitialMember($x$)
\State $G$ $\leftarrow$ $G$ $\cup$ \{$G_{new}$\}
% \State return $G_{new}$
\Else
\State addToCluster($mostSimilarCluster$, $x$)
% \State return $mostSimilarcluster$
\EndIf
\end{algorithmic}
\end{algorithm}
\subsection{User Interface}
As adopting unfamiliar tools may impose an additional burden on developers~\cite{adaption2002}, we implemented \textit{Find Unique Usages} by extending the existing \textit{Find Usages} interface in the IntelliJ IDEA (Fig. \ref{fig:compare}).
Our interface reorganizes the list view, adding usage clusters containing usages with similar code. %As each user cluster contains usage statements surrounded by similar code, we hypothesized the usages would be in similar contexts, implying more information about the usage. Thus user could inspect one usage within a usage cluster and gleam that the other usages within that cluster were similar and move on to another usage cluster depending on their task. The task we associate this tool with is finding and parsing a large variety of usages while seeking examples of an internal API.\par
One key design choice was whether to present results in an inline pane or separate window. We chose to present usages in a separate window to enable more detailed presentation of results. \par
% ***************Evaluation************************
\section{Study 2: Evaluation}
To evaluate \textit{Find Unique Usages}, we conducted a between subjects controlled experiment in which 12 participants worked to add a feature to a codebase.
\subsection{Method}
We recruited 13 participants by advertising on social networks. One participant with insufficient Java experience left the study early. We excluded this participant, yielding 12 participants in our study, C1 to C6 and E1 to E6.
%of the participants left the study in the middle. This participant had three years of experience in Java, but that experience was a while ago. Because of that, he could not remember the syntax of Java and left the study. We exclude the result of that participant from the analysis.
33\% of participants were female, and 66\% male. Five participants were graduate students, four worked as a software developers, and 3 were undergraduates. Participants in both conditions had comparable levels of Java experience, with 60 months for control and 62 for experimental participants. All participants were volunteers.\par
Participants were randomly assigned to a control or experimental condition. \DIFaddbegin \DIFadd{Two of the undergraduates were in the experimental condition, and one was in the control condition. }\DIFaddend All participants used the IntelliJ IDEA. Control participants used the regular \textit{Find Usages} tool and experimental participants used \textit{Find Unique Usages}.
Participants completed the same tasks as tasks in the exploratory study.
At the end of the study, we conducted a semi-structured interview and asked participants about their experiences using the \textit{Find Unique Usages} and \textit{Find Usages} tools.
\subsection{Results}
\DIFaddbegin \subsubsection{\DIFadd{Time completion of tasks}}
\DIFaddend All participants in both conditions successfully completed the task. Experimental participants completed the task in significantly less time (t = 1.82366, p $<$ 0.049094), finishing in \DIFdelbegin \DIFdel{23.33 }\DIFdelend \DIFaddbegin \DIFadd{21.5 }\DIFaddend minutes (SD = 7.65) compared to \DIFdelbegin \DIFdel{32.33 }\DIFdelend \DIFaddbegin \DIFadd{33 }\DIFaddend minutes for control participants (SD = 9.35). \DIFaddbegin \DIFadd{The effect size for Glass's delta is 1.236559 and for Cohen's d = 1.361257. This indicates that the mean of the experimental group is at the $90_{th}$ percentile of the control group.
}\DIFaddend
%in a median of 21.5 minutes, compared to 33 minutes in the control condition. The 6 participants who worked with \textit{Find Unique Usages} (M = 23.33, SD = 7.65) compared to the 6 participants in the control group (M = 32.33, SD = 9.35) need significantly lower time for completion tasks. The t-value is 1.82366. The p-value is 0.049094. The result is significant at p \textless 0.05.\par
%A qualitative data the behaviors of developers might be useful for understanding the challenges and benefits of our tool. We observed the behaviors of developers and labeled them to find a pattern from their activities while they were working on the task. The below behaviors are shared between both the control and experimental groups. The reason that they had many common behaviors was the user interface for our tool and the regular \textit{Find Usages} tool was similar.\par
\DIFdelbegin \DIFdel{Participants }\DIFdelend \DIFaddbegin \subsubsection{\DIFadd{Interacting with lists of results}} \DIFadd{all participants of both groups }\DIFaddend read results of usages sequentially. They began from the first result in the list and proceeded further.
The \textit{Find Usages} view of IntelliJ IDE supports this behavior by expanding and highlighting the first usage.
However, \DIFdelbegin \DIFdel{most participants did not }\DIFdelend \DIFaddbegin \DIFadd{none of participants }\DIFaddend read all of the usage results.
More successful participants used a specific strategy. They first utilized the \textit{Find Usages} with the \textit{Find In Path} tools for understanding the codebase. Before they started reading usages, they expanded and skimmed the list of usages. From this, they selected the best usage that might help them. \par
Usages listed in the results window were easier to read when they contained literals directly in the call site rather than referencing variables or expressions defined elsewhere. When they did contain variables, developers were forced to open the class containing the usage to read and understand it. \par
\DIFaddbegin \subsubsection{\DIFadd{Navigation between list of results and codebase}}\DIFaddend Almost all participants cycled through the following steps.
Participants first clicked on a usage that had an object as an input argument. Next, they went to the class defining the object. They scrolled over the class's methods and read the code of the class. Some, but not all, tried to understand that class and its methods by invoking \textit{Find Usages}. Several participants then reported being lost and experienced difficulty understanding the class. In either case, participants then returned back to the first usage. As they had invoked another Find Usage command, they spent time remembering where they were and re-invoking the first command they began with. In this way, participants cycled through the steps of
selecting a usage, navigating to the class defining the usage, invoking another find usage to understand usages of this code, reading code, and returning back to the starting point. \par
\DIFdelbegin \DIFdel{Participants }\DIFdelend \DIFaddbegin \DIFadd{All participants }\DIFaddend experienced challenges making recursive use of \textit{Find Usages}. Participants selected a usage, opened the class containing that usage, and again invoked \textit{Find Usages} on other methods. After invoking \textit{Find Usages} several times, they sometimes lost their place in the call graph and became disoriented. \par
%Thus they left the usage and came back to the list of results. After that, they move to another usage.
In both conditions, \DIFdelbegin \DIFdel{almost all }\DIFdelend \DIFaddbegin \DIFadd{four }\DIFaddend participants struggled with information overload. For example, in their task, they needed to use a method that was overloaded. This lead to methods that had the same name, but had a different number of input parameters, types of input parameters, or both. When participants were trying to \textit{Find Usages}, this confused them.\par
\DIFaddbegin \subsubsection{\DIFadd{Usability issues}}
\DIFaddend At the end of the study, we interviewed participants about their experiences. Participants reported a number of usability issues. One participant reported that recursive use of \textit{Find Usages} was confusing.
\begin{quote} "I was getting lost when I was using nested [several sequential] \textit{Find Usages} for understand codebase." - (C6)\end{quote}
\noindent C6 and E3 experienced a usability issue with the number showing the line of the usage statement in the file, which they instead interpreted as the frequency of the repeating statement.
E2 and E4 found the use of the same name for all clusters in \textit{Find Unique Usages} to be confusing (see Fig.~\ref{fig:compare}).
C6 specifically requested support for combining \textit{Find Usages} with call graph navigation to better support recursively investigating usages. During the study, he got lost and felt that a call graph navigation tool would help him stay oriented more easily.\par
%It was confusing for 2 of participants (E2 and E4). They suggested us to change the naming convention. Since the name of all clusters was similar (see Figure.~\ref{fig:compare}) they were curious how we clustered the usages.\par
%One participants (C6) recursively applied \textit{Find Usages}, navigating more than one level across the call graph, and reported feeling lost. The participant reported that he felt that supporting more than one level of call navigation was important.
IntelliJ IDEA offers an inline list of \textit{Find Usages}, invoked by pressing control on the keyboard and clicking on a method. In this approach, developers do not go to a separate results window. The two participants that discovered this feature, one each from the control and experimental group, completed the task in the two shortest times (17 and 13 minutes).
%One contorl participant used this approach, completing the task 17 minutes. Seventeen minutes was the second minimum of time among the completion time of all participants. Another developer that used this approach was from the experimental group. He completed the task in 13 minutes, which was the lowest among 12 participants. \par
%Also, we asked them to give us any suggestion that might be useful. Participants gave us a couple of suggestions. The interesting one was C6 told us if they have Call Graphs in combination with \textit{Find Usages} might be useful. He said Call Graph is a useful tool for understanding large codebases, and I am using Call Graph for understanding them. He told us, the Call Graph helps to get an abstract view over methods. Also, he said in this way, developers ignore investigation on some methods and spend most of their time focusing on the basic methods. While C6 was working on his tasks, he called several sequential \textit{Find Usages} for understanding the code. Then he said he has got lost. At the end of the study, he said if he had had a Call Graph, probably he would not have got lost in usages.
% *************************Limitation********************************************************************
\section{\DIFdelbegin \DIFdel{Limitation }\DIFdelend \DIFaddbegin \DIFadd{Limitations }\DIFaddend And Threats To Validity}
Like all studies, our study has several important limitations and potential threats to validity.
Unlike most developers, our participants had no experience with the codebase in which they worked. Developers with more knowledge might navigate less and rely more on their existing code knowledge, resulting in different interactions with \textit{Find Usages}.
Developers also did not benefit from documentation or the ability to ask coworkers questions, which might \DIFaddbegin \DIFadd{have }\DIFaddend changed their information needs and the strategies they used to satisfy them.
To ensure participants were familiar with \textit{Find Usages}, we trained them in its use, enabling a comparison between use of \textit{Find Usages} and \textit{Find Unique Usages}. But participants may not have been familiar with other alternative navigational aids, which they might have instead used. In choosing the codebase in which participants worked, we sought to identify a representative Java codebase of medium size. But codebases with usage sites which differed, such as by containing more or fewer literals or overloaded methods, might have led to different results.
% *************************Discussion********************************************************************
\section{Discussion}
Our exploratory study demonstrated that developers face challenges in using \textit{Find Usages}. Developers faced challenges with the high number of similar usages, often focusing on the first usage without consideration of the rest. To help developers more easily survey the set of usages, we developed a new technique to help developers find unique usages. Our approach utilizes AST diffs of usage contexts to cluster similar usages. We found that developers with \textit{Find Unique Usages} were able to complete a task implementing simple logic in an unfamiliar codebase in 35\% less time.
Only one participant surveyed all usages before beginning reading usages. By default, both \textit{Find Unique Usages} and \textit{Find Usages} in IntelliJ expand and highlight the first usage in the list. This may be one reason many developers choose to focus on the first usage. It is unclear how developers' behavior might change if the IDE did not highlight this first usage. An alternative design might be to expand all usages by default and not highlight any usage.
This design is similar to the inline \textit{Find Usages} offered, which provides only a list of usages and hides other complexity. \par
%Only one participants before starting reading usages expand all behaviors. He could complete the task in rank of 4th among 12 participants, that expanding might helped him.\par
\textit{Find Unique Usages} is very sensitive to the threshold selected, determining the number of clusters created. A more systematic investigation might be used to understand what number of clusters best help developers achieve identifying distinct usages without being overwhelmed by similar usages. Alternatively, the user might be given control to increase or decrease the number of usage clusters they see.
More broadly, many questions remain about just what makes a usage distinct for a developer and how developers might wish to understand these usage clusters.\DIFaddbegin \par
\DIFaddend
\DIFaddbegin \DIFadd{There exists more sophisticated clustering approaches, but we preferred to apply a simple clustering approach. Rather than focus on designing the best possible clustering approach, we wanted to first demonstrate initial evidence that clustering can help when applied to the problem of summarizing code usage sites. We thus focused our attention on the interaction design and studying how developers used the usage clusters we created. We believe that our initial results offer evidence for the promise of this approach. We will further clarify where we see opportunities for more sophisticated clustering approaches to be adapted based on our findings.}\par
\DIFaddend Our results strongly established the importance of call graph navigation tools. Rather than simply \DIFdelbegin \DIFdel{look }\DIFdelend \DIFaddbegin \DIFadd{looking }\DIFaddend at usages one level deep, developers often wished to understand usages by going deeper. A wide variety of call graph navigation tools have been designed to support this behavior, beginning with tools such as the call hierarchy and extending to more sophisticated research tools such as StackSplorer~\cite{karrer2011stacksplorer}, Reacher~\cite{latoza2011visualizing}, and Prodet~\cite{augustine2015field}. Our results suggest that these tools may help support developers in understanding method usages. Hybrid tools might also be envisioned, combining a view showing clustered usages with the ability to follow paths through the call graph.
\DIFaddbegin \DIFadd{Finding that developers are overwhelmed by too many irrelevant search results and that reducing these is not surprising. While the current usability issues are, in retrospect, obvious, they are still a pervasive and everyday challenge for developers and bringing attention to them is thus important. In addition, our findings offer important insights for designing tools to successfully address these issues, such as identifying what information developers gather when manually inspecting usage sites.
}
\DIFaddend %This should either be picked based on trying out different thresholds on different projects or probably better, allowing it to be configured by a menu option as part of the IDE plugin. Choosing the similarity threshold here for describing similar code, is also familiar to choosing a K when doing K-means, which depends highly on the data-set that K-means is applied to. Picking a reasonable default threshold would be future work, but for now, the best approach would probably be to set it as an option via a menu.
%The approach that we used for clustering the usage was a naive approach to clustering that we took due to time constraints preventing us from better understanding the IntelliJ SDK. In the future, we hope to re-implement the usage clustering with merging as part of an agglomerate clustering approach. Also, We can work on scalability and response time of the tool \par
%A couple of participants complaint about the naming convention of clusters. The names of the clusters was similar. In Fig.~\ref{fig:compare} labels of clusters are shown. In future, we must change it to more readable names.\par
% *************************Conclusion********************************************************************
%\section{Conclusion}
%Developers are overwhelmed with duplicate usages results when they are working with Find Usage tool on large codebases to understand the code. We designed and developed a tool that aggregates similar usages. Results of the showed clustering usages based on similarity is useful for developers.
%DIF < \section*{Acknowledgment}
%DIF < % [TODO]: mention Dr. Jon Bell. He helped us in first ideation part of the paper.
%DIF < acknowledgments in the unnumbered footnote on the first page.
\DIFaddbegin \section*{\DIFadd{Acknowledgment}}
\DIFadd{We thank Jon Bell for his contributions in brainstorming of the idea of this research, we thank the participants in the study for their participation, and we also thank our colleagues in CS795 of Fall '19.
This research project was supported in part by the National Science Foundation under grants CCF-1414197 and CCF-1845508.
}\DIFaddend
\bibliographystyle{IEEEtran}
\bibliography{FUU}
\end{document}