-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreport.bib
More file actions
561 lines (511 loc) · 18.6 KB
/
report.bib
File metadata and controls
561 lines (511 loc) · 18.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
@inproceedings{massiveparallel,
author = {Levitan, Steven P. and Chiarulli, Donald M.},
title = {Massively Parallel Processing: It's D\'{E}J\`{A} Vu All over Again},
booktitle = {Proceedings of the 46th Annual Design Automation Conference},
series = {DAC '09},
year = {2009},
isbn = {978-1-60558-497-3},
location = {San Francisco, California},
pages = {534--538},
numpages = {5},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {massively parallel processing, multicore, parallel architectures and algorithms},
}
@article{amdahlmulticore,
author = {Yavits, L. and Morad, A. and Ginosar, R.},
title = {The Effect of Communication and Synchronization on Amdahl's Law in Multicore Systems},
journal = {Parallel Comput.},
issue_date = {January, 2014},
volume = {40},
number = {1},
month = jan,
year = {2014},
issn = {0167-8191},
pages = {1--16},
numpages = {16},
publisher = {Elsevier Science Publishers B. V.},
address = {Amsterdam, The Netherlands, The Netherlands},
keywords = {Amdahl's law, Analytical Performance Models, Multicore},
}
@article{sharedcaches,
author = {Irwin, Mary Jane},
title = {Shared Caches in Multicores: The Good, the Bad, and the Ugly},
journal = {SIGARCH Comput. Archit. News},
issue_date = {June 2010},
volume = {38},
number = {3},
month = jun,
year = {2010},
issn = {0163-5964},
pages = {234--234},
numpages = {1},
acmid = {1815990},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {caches, multicore},
}
@inproceedings{emergingmulticore,
author = {Williams, Samuel and Oliker, Leonid and Vuduc, Richard and Shalf, John and Yelick, Katherine and Demmel, James},
title = {Optimization of Sparse Matrix-vector Multiplication on Emerging Multicore Platforms},
booktitle = {Proceedings of the 2007 ACM/IEEE Conference on Supercomputing},
series = {SC '07},
year = {2007},
isbn = {978-1-59593-764-3},
location = {Reno, Nevada},
pages = {38:1--38:12},
articleno = {38},
numpages = {12},
publisher = {ACM},
address = {New York, NY, USA},
}
@article{threesigmas,
abstract = {For random variables with a unimodal Lebesgue density, the 3σ rule is proved by elementary calculus. It emerges as a special case of the {Vysochanski\u{i}-Petunin} inequality, which in turn is based on the Gauss inequality.},
author = {Pukelsheim, Friedrich},
doi = {10.2307/2684253},
journal = {The American Statistician},
keywords = {distribution\_theory, statistics, tas},
number = {2},
pages = {88--91},
posted-at = {2008-05-02 13:21:11},
priority = {2},
publisher = {American Statistical Association},
title = {The Three Sigma Rule},
volume = {48},
year = {1994}
}
@article{hotspot-paper,
title={{Hotspot: a Framework to Support Performance Optimization on Multiprocessors}},
author={Fernando Tinetti and Andres More},
journal={PDPTA'15: The 2015 International Conference on Parallel and Distributed Processing Techniques and Applications},
year={2015}
}
@inproceedings{lessons,
title={{Lessons Learned from Contrasting BLAS Kernel Implementations}},
author={More, Andres},
booktitle={XVIII Congreso Argentino de Ciencias de la Computacion},
year={2013}
}
@book{xeonphi,
author = {Jeffers, James and Reinders, James},
title = {Intel Xeon Phi Coprocessor High Performance Programming},
year = {2013},
isbn = {9780124104143, 9780124104945},
edition = {1st},
publisher = {Morgan Kaufmann Publishers Inc.},
address = {San Francisco, CA, USA},
}
@techreport{mm-tool,
author = {{Andres More}},
title = {{A Case Study on High Performance Matrix Multiplication}},
url = {https://code.google.com/p/mm-matrixmultiplicationtool/},
year = {2008},
institution = {Universidad Nacional de La Plata},
}
@book{is-parallel-programming-hard,
author = {McKenney, Paul E.},
publisher = {kernel.org},
title = {Is Parallel Programming Hard, And, If So, What Can You Do About It?},
url = {http://kernel.org/pub/linux/kernel/people/paulmck/perfbook/perfbook.html},
year = {2010}
}
@INPROCEEDINGS{beowulf-parallel-workstation,
author = {Thomas Sterling and Donald J. Becker and Daniel Savarese and John E. Dorband and Udaya A. Ranawake and Charles V. Packer},
title = {Beowulf: A Parallel Workstation For Scientific Computation},
booktitle = {In Proceedings of the 24th International Conference on Parallel Processing},
year = {1995},
pages = {11--14},
publisher = {CRC Press}
}
@techreport{mpi-standard,
institution = {University of Tennessee},
month = may,
title = {{MPI}: A {Message-Passing} Interface Standard},
year = {1994},
author = {Message Passing Interface Forum}
}
@manual{openmp-api,
author = {{OpenMP Architecture Review Board}},
edition = {3.0},
month = may,
title = {{OpenMP} Application Program Interface},
url = {http://www.openmp.org},
year = {2008}
}
@techreport{tinetti,
author = {{Fernando G. Tinetti}},
title = {{Cómputo Paralelo en Redes Locales de Computadoras}},
year = {2004},
institution = {Universidad Nacional de La Plata}
}
@article{gprof,
author = {Graham, Susan L. and Kessler, Peter B. and McKusick, Marshall K.},
title = {gprof: a call graph execution profiler},
journal = {SIGPLAN Not.},
volume = {39},
number = {4},
month = apr,
year = {2004},
pages = {49--57},
numpages = {9},
publisher = {ACM},
address = {New York, NY, USA},
}
@book{hennessy,
author = {Hennessy, John L. and Patterson, David A.},
title = {Computer architecture: a quantitative approach},
year = {2002},
isbn = {1-55860-596-7},
edition = {3rd},
publisher = {Morgan Kaufmann Publishers Inc.},
address = {San Francisco, CA, USA},
}
@book{intel-optimization,
author = {{Intel Corporation}},
title = {{Intel\textsuperscript{\textregistered} 64 and IA-32 Architectures Optimization Reference Manual}},
year = {2009},
month = {March},
number = {248966-018},
series = {1},
publisher = {Intel Press},
}
@techreport{memory,
author = {Drepper, Ulrich},
month = nov,
title = {{What Every Programmer Should Know About Memory}},
url = {http://people.redhat.com/drepper/cpumemory.pdf},
year = {2007},
institution = {Red Hat},
}
@techreport{gregg,
author = {Brendan Gregg},
month = feb,
title = {{Linux Performance Analysis and Tools}},
url = {http://es.slideshare.net/brendangregg/linux-performance-analysis-and-tools},
year = {2013},
institution = {Joyent},
}
@book{patterns,
author = {Mattson, Timothy and Sanders, Beverly and Massingill, Berna},
title = {Patterns for parallel programming},
year = {2004},
isbn = {0321228111},
edition = {First},
publisher = {Addison-Wesley Professional},
}
@incollection{automatic,
author = {Margalef, Tom\`{a}s and Jorba, Josep and Morajko, Oleg and Morajko, Anna and Luque, Emilio},
chapter = {Different approaches to automatic performance analysis of distributed applications},
title = {Performance analysis and grid computing},
booktitle = {Performance analysis and grid computing},
editor = {Getov, Vladimir and Gerndt, Michael and Hoisie, Adolfy and Malony, Allen and Miller, Barton},
year = {2004},
isbn = {1-4020-7693-2},
pages = {3--19},
numpages = {17},
publisher = {Kluwer Academic Publishers},
address = {Norwell, MA, USA},
keywords = {automatic performance analysis, distributed computing, dynamic tuning},
}
@inproceedings{capturing,
author = {Huck, Kevin A. and Hernandez, Oscar and Bui, Van and Chandrasekaran, Sunita and Chapman, Barbara and Malony, Allen D. and McInnes, Lois Curfman and Norris, Boyana},
title = {Capturing performance knowledge for automated analysis},
booktitle = {Proceedings of the 2008 ACM/IEEE conference on Supercomputing},
series = {SC '08},
year = {2008},
isbn = {978-1-4244-2835-9},
location = {Austin, Texas},
pages = {49:1--49:10},
articleno = {49},
numpages = {10},
url = {http://dl.acm.org/citation.cfm?id=1413370.1413420},
acmid = {1413420},
publisher = {IEEE Press},
address = {Piscataway, NJ, USA},
}
@article{hybrid,
author = {Wolf, Felix and Mohr, Bernd},
title = {Automatic performance analysis of hybrid MPI/OpenMP applications},
journal = {J. Syst. Archit.},
issue_date = {November 2003},
volume = {49},
number = {10-11},
month = nov,
year = {2003},
issn = {1383-7621},
pages = {421--439},
numpages = {19},
url = {http://dx.doi.org/10.1016/S1383-7621(03)00102-4},
doi = {10.1016/S1383-7621(03)00102-4},
acmid = {967618},
publisher = {Elsevier North-Holland, Inc.},
address = {New York, NY, USA},
keywords = {event tracing, parallel computing, performance analysis, user interface},
}
@inproceedings{intro,
author = {Smith, Connie U.},
title = {Introduction to software performance engineering: origins and outstanding problems},
booktitle = {Proceedings of the 7th international conference on Formal methods for performance evaluation},
series = {SFM'07},
year = {2007},
isbn = {978-3-540-72482-7},
location = {Bertinoro, Italy},
pages = {395--428},
numpages = {34},
url = {http://dl.acm.org/citation.cfm?id=1768017.1768027},
acmid = {1768027},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
keywords = {SPE, SPE process, performance antipattterns, performance models, performance patterns, performance prediction, queueing network models, software performance engineering},
}
@inproceedings{future,
author = {Woodside, Murray and Franks, Greg and Petriu, Dorina C.},
title = {The Future of Software Performance Engineering},
booktitle = {2007 Future of Software Engineering},
series = {FOSE '07},
year = {2007},
isbn = {0-7695-2829-5},
pages = {171--187},
numpages = {17},
url = {http://dx.doi.org/10.1109/FOSE.2007.32},
doi = {10.1109/FOSE.2007.32},
acmid = {1254717},
publisher = {IEEE Computer Society},
address = {Washington, DC, USA},
}
@inproceedings{overview,
author = {Browne, J. C.},
title = {A critical overview of computer performance evaluation},
booktitle = {Proceedings of the 2nd international conference on Software engineering},
series = {ICSE '76},
year = {1976},
location = {San Francisco, California, USA},
pages = {138--145},
numpages = {8},
url = {http://dl.acm.org/citation.cfm?id=800253.807665},
acmid = {807665},
publisher = {IEEE Computer Society Press},
address = {Los Alamitos, CA, USA},
keywords = {Data base systems, Measurement, Modeling, Networks, Performance evaluation, Scheduling, Work and capacity},
}
\bibitem{hpctoolkit}
Rice University,
\emph{HPC Toolkit}, {\tt http://hpctoolkit.org}.
\bibitem{papi}
University of Tennessee,
\emph{Performance Application Programming Interface},
{\tt http://icl.cs.utk.edu/papi}.
@inproceedings{amdahl,
author = {Amdahl, Gene M.},
title = {Validity of the single processor approach to achieving large scale computing capabilities},
booktitle = {Proceedings of the April 18-20, 1967, spring joint computer conference},
series = {AFIPS '67 (Spring)},
year = {1967},
location = {Atlantic City, New Jersey},
pages = {483--485},
numpages = {3},
url = {http://doi.acm.org/10.1145/1465482.1465560},
doi = {10.1145/1465482.1465560},
acmid = {1465560},
publisher = {ACM},
address = {New York, NY, USA},
}
@Proceedings{mandel,
editor = "Beno{\^\i}t B. Mandelbrot and Dann E. Passoja",
booktitle = "{Fractal aspects of materials: metal and catalyst
surfaces, powders and aggregates: extended abstracts}",
title = "{Fractal aspects of materials: metal and catalyst
surfaces, powders and aggregates: extended abstracts}",
volume = "EA--4",
publisher = "Materials Research Society",
address = "Pittsburgh, PA, USA",
pages = "v + 47",
year = "1984",
LCCN = "QA447 .F72 1984",
bibdate = "Mon Sep 10 14:59:48 MDT 2012",
bibsource = "clas.caltech.edu:210/INNOPAC;
http://www.math.utah.edu/pub/bibnet/authors/m/mandelbrot-benoit.bib",
series = "Materials Research Society extended abstracts",
acknowledgement = ack-nhfb,
remark = "Proceedings of Symposium P, 1984 Fall Meeting of the
Materials Research Society, November 26--27, 1984,
Boston Marriott Hotel at Copley Place, Boston,
Massachusetts.",
subject = "Geometry; Congresses; Fractals; Surfaces
(Technology)",
}
\bibitem{twelve-ways}
D. Bailey, \emph{Twelve Ways to Fool the Masses When Giving Performance
Results on Parallel Computers},
RNR Technical Report, RNR-90-020, NASA Ames Research Center, 1991.
% modern version of that
@ARTICLE{gustafson,
author = {John L. Gustafson},
title = {{Reevaluating Amdahl's Law}},
journal = {Communications of the ACM},
year = {1988},
volume = {31},
pages = {532--533}
}
@article{karp-flatt,
author = {Karp, Alan H. and Flatt, Horace P.},
title = {Measuring parallel processor performance},
journal = {Commun. ACM},
volume = {33},
number = {5},
month = may,
year = {1990},
issn = {0001-0782},
pages = {539--543},
numpages = {5},
publisher = {ACM},
address = {New York, NY, USA},
}
@article{gpu-myth,
author = {Lee, Victor W. and Kim, Changkyu and Chhugani, Jatin and Deisher, Michael and Kim, Daehyun and Nguyen, Anthony D. and Satish, Nadathur and Smelyanskiy, Mikhail and Chennupaty, Srinivas and Hammarlund, Per and Singhal, Ronak and Dubey, Pradeep},
title = {Debunking the 100X GPU vs. CPU myth: an evaluation of throughput computing on CPU and GPU},
journal = {SIGARCH Comput. Archit. News},
volume = {38},
number = {3},
month = jun,
year = {2010},
issn = {0163-5964},
pages = {451--460},
numpages = {10},
publisher = {ACM},
address = {New York, NY, USA},
}
@ARTICLE{stream,
author = {McCalpin, J. D.},
month = {Dec},
title = {Memory Bandwidth and Machine Balance in Current High Performance Computers},
journal = {IEEE Technical Committee on Computer Architecture (TCCA) Newsletter},
year = {1995}
}
@INPROCEEDINGS{counters,
author = {Dong H. Ahn and Jeffrey S. Vetter},
title = {Scalable analysis techniques for microprocessor performance counter metrics},
booktitle = {In Proc. of the Conference on Supercomputers (SC2002},
year = {2002},
pages = {1--16},
publisher = {IEEE Computer Society Press}
}
@book{linpack,
author = {Dongarra, J. J. and Moler, C. B. and Bunch, J. R. and Stewart, G. W.},
publisher = {SIAM},
title = {{LINPACK} User's Guide},
year = {1979}
}
@techreport{hpl,
address = {http://www.netlib.org/benchmark/hpl/},
author = {Petitet, A. and Whaley, R. C. and Dongarra, Jack and Cleary, A.},
title = {{HPL} - A Portable Implementation of the {High-Performance} Linpack Benchmark for {Distributed-Memory} Computers},
institution = {University of Tennessee},
year = {2008},
}
@TECHREPORT{hpcc,
author = {Piotr Luszczek and Jack J. Dongarra and David Koester and Rolf Rabenseifner and Bob Lucas and Jeremy Kepner and John Mccalpin and David Bailey and Daisuke Takahashi},
title = {{Introduction to the HPC Challenge Benchmark Suite}},
year = {2005},
institution = {.},
}
@article{latency,
author = {Rafael Garabato and
Victor Rosales and
Andres More},
title = {{Optimizing Latency in Beowulf Clusters}},
journal = {CLEI Electron. J.},
volume = {15},
number = {3},
year = {2012},
ee = {http://www.clei.cl/cleiej/paper.php?id=248},
}
@article{how-not-to-lie,
author = {Fleming, Philip J. and Wallace, John J.},
title = {How not to lie with statistics: the correct way to summarize benchmark results},
journal = {Commun. ACM},
volume = {29},
number = {3},
month = mar,
year = {1986},
issn = {0001-0782},
pages = {218--221},
numpages = {4},
publisher = {ACM},
address = {New York, NY, USA},
}
@book{numerical-analysis,
author = {Atkinson, Kendall},
edition = {2},
isbn = {0471624896},
publisher = {Wiley},
title = {An Introduction to Numerical Analysis},
url = {http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20\&path=ASIN/0471624896},
year = {1989},
}
@article{conway,
author = {Gardner, Martin},
journal = {Scientific American},
month = oct,
pages = {120--123},
posted-at = {2008-12-03 13:54:53},
title = {{Mathematical Games: The fantastic combinations of John Conway's new solitaire game 'life'}},
year = {1970}
}
@book{kiviat,
author = "Chambers, J. M., Cleveland, W. S., Kleiner, B. and Tukey, P. A.",
year = 1983,
title = "Graphical Methods for Data Analysis",
address = "Belmont, CA",
publisher = "Wadsworth"
}
@Article{matplotlib,
Author = {Hunter, J. D.},
Title = {Matplotlib: A 2D graphics environment},
Journal = {Computing In Science \& Engineering},
Volume = {9},
Number = {3},
Pages = {90--95},
abstract = {Matplotlib is a 2D graphics package used for Python
for application development, interactive scripting, and
publication-quality image generation across user
interfaces and operating systems.},
publisher = {IEEE COMPUTER SOC},
year = 2007
}
@article{numpy,
author = {Walt, Stefan van der and Colbert, S. Chris and Varoquaux, Gael},
title = {The NumPy Array: A Structure for Efficient Numerical Computation},
journal = {Computing in Science and Engg.},
issue_date = {March 2011},
volume = {13},
number = {2},
month = mar,
year = {2011},
issn = {1521-9615},
pages = {22--30},
numpages = {9},
url = {http://dx.doi.org/10.1109/MCSE.2011.37},
doi = {10.1109/MCSE.2011.37},
acmid = {1957466},
publisher = {IEEE Educational Activities Department},
address = {Piscataway, NJ, USA},
keywords = {NumPy, Python, Python, NumPy, scientific programming, numerical computations, programming libraries, numerical computations, programming libraries, scientific programming},
}
@book{mklbook,
title={Intel Math Kernel Library. Reference Manual},
publisher={Intel Corporation},
city={Santa Clara},
country={USA},
isbn={630813-054US},
year={2009},
author={Intel Corporation},
}
@book{stddevbook,
author = {Kazmier, Leonard J.},
title = {Schaum's outline of theory and problems of business statistics /},
publisher = {McGraw-Hill,},
year = {c1976.},
address = {New York :},
note = {Includes index.}
}