nn_retrieval_report/siamese.bib at master · MatthiasKohl/nn_retrieval_report · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
@inproceedings{radenovic_cnn_2016,
	title = {{CNN} {Image} {Retrieval} {Learns} from {BoW}: {Unsupervised} {Fine}-{Tuning} with {Hard} {Examples}},
	shorttitle = {{CNN} {Image} {Retrieval} {Learns} from {BoW}},
	url = {https://link.springer.com/chapter/10.1007/978-3-319-46448-0_1},
	doi = {10.1007/978-3-319-46448-0_1},
	abstract = {Convolutional Neural Networks (CNNs) achieve state-of-the-art performance in many computer vision tasks. However, this achievement is preceded by extreme manual annotation in order to perform either training from scratch or fine-tuning for the target task. In this work, we propose to fine-tune CNN for image retrieval from a large collection of unordered images in a fully automated manner. We employ state-of-the-art retrieval and Structure-from-Motion (SfM) methods to obtain 3D models, which are used to guide the selection of the training data for CNN fine-tuning. We show that both hard positive and hard negative examples enhance the final performance in particular object retrieval with compact codes.},
	language = {en},
	urldate = {2017-03-10},
	booktitle = {Computer {Vision} {\textendash} {ECCV} 2016},
	publisher = {Springer, Cham},
	author = {Radenovi{\'c}, Filip and Tolias, Giorgos and Chum, Ond{\v r}ej},
	month = oct,
	year = {2016},
	pages = {3--20},
	file = {1604.02426.pdf:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/ZHS4MWC9/1604.02426.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/6N4J8SNI/978-3-319-46448-0_1.html:text/html}
}

@inproceedings{gordo_deep_2016,
	title = {Deep {Image} {Retrieval}: {Learning} {Global} {Representations} for {Image} {Search}},
	shorttitle = {Deep {Image} {Retrieval}},
	url = {https://link.springer.com/chapter/10.1007/978-3-319-46466-4_15},
	doi = {10.1007/978-3-319-46466-4_15},
	abstract = {We propose a novel approach for instance-level image retrieval. It produces a global and compact fixed-length representation for each image by aggregating many region-wise descriptors. In contrast to previous works employing pre-trained deep networks as a black box to produce features, our method leverages a deep architecture trained for the specific task of image retrieval. Our contribution is twofold: (i) we leverage a ranking framework to learn convolution and projection weights that are used to build the region features; and (ii) we employ a region proposal network to learn which regions should be pooled to form the final global descriptor. We show that using clean training data is key to the success of our approach. To that aim, we use a large scale but noisy landmark dataset and develop an automatic cleaning approach. The proposed architecture produces a global image representation in a single forward pass. Our approach significantly outperforms previous approaches based on global descriptors on standard datasets. It even surpasses most prior works based on costly local descriptor indexing and spatial verification. Additional material is available at www.xrce.xerox.com/Deep-Image-Retrieval.},
	language = {en},
	urldate = {2017-03-10},
	booktitle = {Computer {Vision} {\textendash} {ECCV} 2016},
	publisher = {Springer, Cham},
	author = {Gordo, Albert and Almaz{\'a}n, Jon and Revaud, Jerome and Larlus, Diane},
	month = oct,
	year = {2016},
	pages = {241--257},
	file = {1604.01325.pdf:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/XM3S297Z/1604.01325.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/PNN96KZJ/978-3-319-46466-4_15.html:text/html}
}

@inproceedings{sun_deep_2014,
	title = {Deep learning face representation by joint identification-verification},
	url = {http://papers.nips.cc/paper/5416-analog-memories-in-a-balanced-rate-based-network-of-e-i-neurons.pdf},
	urldate = {2017-03-10},
	booktitle = {Advances in neural information processing systems},
	author = {Sun, Yi and Chen, Yuheng and Wang, Xiaogang and Tang, Xiaoou},
	year = {2014},
	pages = {1988--1996},
	file = {[PDF] nips.cc:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/X8I89QBK/Sun et al. - 2014 - Deep learning face representation by joint identif.pdf:application/pdf}
}

@article{weinberger_distance_2006,
	title = {Distance metric learning for large margin nearest neighbor classification},
	volume = {18},
	url = {https://papers.nips.cc/paper/2795-distance-metric-learning-for-large-margin-nearest-neighbor-classification.pdf},
	urldate = {2017-03-10},
	journal = {Advances in neural information processing systems},
	author = {Weinberger, Kilian Q. and Blitzer, John and Saul, Lawrence},
	year = {2006},
	pages = {1473},
	file = {[PDF] nips.cc:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/5SGUSRK8/Weinberger et al. - 2006 - Distance metric learning for large margin nearest .pdf:application/pdf}
}

@inproceedings{schroff_facenet:_2015,
	title = {Facenet: {A} unified embedding for face recognition and clustering},
	shorttitle = {Facenet},
	url = {http://www.cv-foundation.org/openaccess/content_cvpr_2015/html/Schroff_FaceNet_A_Unified_2015_CVPR_paper.html},
	urldate = {2017-03-10},
	booktitle = {Proceedings of the {IEEE} {Conference} on {Computer} {Vision} and {Pattern} {Recognition}},
	author = {Schroff, Florian and Kalenichenko, Dmitry and Philbin, James},
	year = {2015},
	pages = {815--823},
	file = {[PDF] cv-foundation.org:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/6WC82GH2/Schroff et al. - 2015 - Facenet A unified embedding for face recognition .pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/HCGSSDDR/Schroff_FaceNet_A_Unified_2015_CVPR_paper.html:text/html}
}

@inproceedings{chopra_learning_2005,
	title = {Learning a similarity metric discriminatively, with application to face verification},
	volume = {1},
	doi = {10.1109/CVPR.2005.202},
	abstract = {We present a method for training a similarity metric from data. The method can be used for recognition or verification applications where the number of categories is very large and not known during training, and where the number of training samples for a single category is very small. The idea is to learn a function that maps input patterns into a target space such that the L1 norm in the target space approximates the "semantic" distance in the input space. The method is applied to a face verification task. The learning process minimizes a discriminative loss function that drives the similarity metric to be small for pairs of faces from the same person, and large for pairs from different persons. The mapping from raw to the target space is a convolutional network whose architecture is designed for robustness to geometric distortions. The system is tested on the Purdue/AR face database which has a very high degree of variability in the pose, lighting, expression, position, and artificial occlusions such as dark glasses and obscuring scarves.},
	booktitle = {2005 {IEEE} {Computer} {Society} {Conference} on {Computer} {Vision} and {Pattern} {Recognition} ({CVPR}'05)},
	author = {Chopra, S. and Hadsell, R. and LeCun, Y.},
	month = jun,
	year = {2005},
	keywords = {Artificial neural networks, Character generation, discriminative loss function, Drives, face recognition, face verification, geometric distortion, Glass, L1 norm, learning (artificial intelligence), Robustness, semantic distance approximation, similarity metric learning, Spatial databases, Support vector machine classification, Support vector machines, System testing},
	pages = {539--546 vol. 1},
	file = {cvpr05.pdf:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/2KGM36QV/cvpr05.pdf:application/pdf;IEEE Xplore Abstract Record:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/T2FP3MD6/1467314.html:text/html}
}

@article{tolias_particular_2015,
	title = {Particular object retrieval with integral max-pooling of {CNN} activations},
	url = {http://arxiv.org/abs/1511.05879},
	abstract = {Recently, image representation built upon Convolutional Neural Network (CNN) has been shown to provide effective descriptors for image search, outperforming pre-CNN features as short-vector representations. Yet such models are not compatible with geometry-aware re-ranking methods and still outperformed, on some particular object retrieval benchmarks, by traditional image search systems relying on precise descriptor matching, geometric re-ranking, or query expansion. This work revisits both retrieval stages, namely initial search and re-ranking, by employing the same primitive information derived from the CNN. We build compact feature vectors that encode several image regions without the need to feed multiple inputs to the network. Furthermore, we extend integral images to handle max-pooling on convolutional layer activations, allowing us to efficiently localize matching objects. The resulting bounding box is finally used for image re-ranking. As a result, this paper significantly improves existing CNN-based recognition pipeline: We report for the first time results competing with traditional methods on the challenging Oxford5k and Paris6k datasets.},
	urldate = {2017-03-10},
	journal = {arXiv:1511.05879 [cs]},
	author = {Tolias, Giorgos and Sicre, Ronan and J{\'e}gou, Herv{\'e}},
	month = nov,
	year = {2015},
	note = {arXiv: 1511.05879},
	keywords = {Computer Science - Computer Vision and Pattern Recognition},
	file = {arXiv\:1511.05879 PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/I24943MU/Tolias et al. - 2015 - Particular object retrieval with integral max-pool.pdf:application/pdf;arXiv.org Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/QZKSFCDC/1511.html:text/html}
}

@article{mishkin_systematic_2016,
	title = {Systematic evaluation of {CNN} advances on the {ImageNet}},
	url = {http://arxiv.org/abs/1606.02228},
	abstract = {The paper systematically studies the impact of a range of recent advances in CNN architectures and learning methods on the object categorization (ILSVRC) problem. The evalution tests the influence of the following choices of the architecture: non-linearity (ReLU, ELU, maxout, compatibility with batch normalization), pooling variants (stochastic, max, average, mixed), network width, classifier design (convolutional, fully-connected, SPP), image pre-processing, and of learning parameters: learning rate, batch size, cleanliness of the data, etc. The performance gains of the proposed modifications are first tested individually and then in combination. The sum of individual gains is bigger than the observed improvement when all modifications are introduced, but the "deficit" is small suggesting independence of their benefits. We show that the use of 128x128 pixel images is sufficient to make qualitative conclusions about optimal network structure that hold for the full size Caffe and VGG nets. The results are obtained an order of magnitude faster than with the standard 224 pixel images.},
	urldate = {2017-03-10},
	journal = {arXiv:1606.02228 [cs]},
	author = {Mishkin, Dmytro and Sergievskiy, Nikolay and Matas, Jiri},
	month = jun,
	year = {2016},
	note = {arXiv: 1606.02228},
	keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Learning, Computer Science - Neural and Evolutionary Computing},
	file = {arXiv\:1606.02228 PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/QNHW29C9/Mishkin et al. - 2016 - Systematic evaluation of CNN advances on the Image.pdf:application/pdf;arXiv.org Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/4DJFXCZ7/1606.html:text/html}
}

@article{iandola_squeezenet:_2016,
	title = {{SqueezeNet}: {AlexNet}-level accuracy with 50x fewer parameters and {\textless}0.5MB model size},
	shorttitle = {{SqueezeNet}},
	url = {http://arxiv.org/abs/1602.07360},
	abstract = {Recent research on deep neural networks has focused primarily on improving accuracy. For a given accuracy level, it is typically possible to identify multiple DNN architectures that achieve that accuracy level. With equivalent accuracy, smaller DNN architectures offer at least three advantages: (1) Smaller DNNs require less communication across servers during distributed training. (2) Smaller DNNs require less bandwidth to export a new model from the cloud to an autonomous car. (3) Smaller DNNs are more feasible to deploy on FPGAs and other hardware with limited memory. To provide all of these advantages, we propose a small DNN architecture called SqueezeNet. SqueezeNet achieves AlexNet-level accuracy on ImageNet with 50x fewer parameters. Additionally, with model compression techniques we are able to compress SqueezeNet to less than 0.5MB (510x smaller than AlexNet). The SqueezeNet architecture is available for download here: https://github.com/DeepScale/SqueezeNet},
	urldate = {2017-03-10},
	journal = {arXiv:1602.07360 [cs]},
	author = {Iandola, Forrest N. and Han, Song and Moskewicz, Matthew W. and Ashraf, Khalid and Dally, William J. and Keutzer, Kurt},
	month = feb,
	year = {2016},
	note = {arXiv: 1602.07360},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Computer Vision and Pattern Recognition},
	file = {arXiv\:1602.07360 PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/M53HCSF8/Iandola et al. - 2016 - SqueezeNet AlexNet-level accuracy with 50x fewer .pdf:application/pdf;arXiv.org Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/XDPS9JNM/1602.html:text/html}
}

@incollection{krizhevsky_imagenet_2012,
	title = {{ImageNet} {Classification} with {Deep} {Convolutional} {Neural} {Networks}},
	url = {http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf},
	urldate = {2017-03-10},
	booktitle = {Advances in {Neural} {Information} {Processing} {Systems} 25},
	publisher = {Curran Associates, Inc.},
	author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
	editor = {Pereira, F. and Burges, C. J. C. and Bottou, L. and Weinberger, K. Q.},
	year = {2012},
	pages = {1097--1105},
	file = {NIPS Full Text PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/WCGZEHGG/Krizhevsky et al. - 2012 - ImageNet Classification with Deep Convolutional Ne.pdf:application/pdf;NIPS Snapshort:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/3TFPHGWQ/4824-imagenet-classification-with-deep-convolutional-neural-networks.html:text/html}
}

@article{simonyan_very_2014,
	title = {Very {Deep} {Convolutional} {Networks} for {Large}-{Scale} {Image} {Recognition}},
	url = {http://arxiv.org/abs/1409.1556},
	abstract = {In this work we investigate the effect of the convolutional network depth on its accuracy in the large-scale image recognition setting. Our main contribution is a thorough evaluation of networks of increasing depth using an architecture with very small (3x3) convolution filters, which shows that a significant improvement on the prior-art configurations can be achieved by pushing the depth to 16-19 weight layers. These findings were the basis of our ImageNet Challenge 2014 submission, where our team secured the first and the second places in the localisation and classification tracks respectively. We also show that our representations generalise well to other datasets, where they achieve state-of-the-art results. We have made our two best-performing ConvNet models publicly available to facilitate further research on the use of deep visual representations in computer vision.},
	urldate = {2017-03-10},
	journal = {arXiv:1409.1556 [cs]},
	author = {Simonyan, Karen and Zisserman, Andrew},
	month = sep,
	year = {2014},
	note = {arXiv: 1409.1556},
	keywords = {Computer Science - Computer Vision and Pattern Recognition},
	file = {arXiv\:1409.1556 PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/URH4NTMM/Simonyan and Zisserman - 2014 - Very Deep Convolutional Networks for Large-Scale I.pdf:application/pdf;arXiv.org Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/3FJTQNN5/1409.html:text/html}
}

@article{he_deep_2015,
	title = {Deep {Residual} {Learning} for {Image} {Recognition}},
	url = {http://arxiv.org/abs/1512.03385},
	abstract = {Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57\% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28\% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC \& COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.},
	urldate = {2017-03-10},
	journal = {arXiv:1512.03385 [cs]},
	author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
	month = dec,
	year = {2015},
	note = {arXiv: 1512.03385},
	keywords = {Computer Science - Computer Vision and Pattern Recognition},
	file = {arXiv\:1512.03385 PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/75TWFUZ6/He et al. - 2015 - Deep Residual Learning for Image Recognition.pdf:application/pdf;arXiv.org Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/58QU6D8S/1512.html:text/html}
}

@article{szegedy_inception-v4_2016,
	title = {Inception-v4, {Inception}-{ResNet} and the {Impact} of {Residual} {Connections} on {Learning}},
	url = {http://arxiv.org/abs/1602.07261},
	abstract = {Very deep convolutional networks have been central to the largest advances in image recognition performance in recent years. One example is the Inception architecture that has been shown to achieve very good performance at relatively low computational cost. Recently, the introduction of residual connections in conjunction with a more traditional architecture has yielded state-of-the-art performance in the 2015 ILSVRC challenge; its performance was similar to the latest generation Inception-v3 network. This raises the question of whether there are any benefit in combining the Inception architecture with residual connections. Here we give clear empirical evidence that training with residual connections accelerates the training of Inception networks significantly. There is also some evidence of residual Inception networks outperforming similarly expensive Inception networks without residual connections by a thin margin. We also present several new streamlined architectures for both residual and non-residual Inception networks. These variations improve the single-frame recognition performance on the ILSVRC 2012 classification task significantly. We further demonstrate how proper activation scaling stabilizes the training of very wide residual Inception networks. With an ensemble of three residual and one Inception-v4, we achieve 3.08 percent top-5 error on the test set of the ImageNet classification (CLS) challenge},
	urldate = {2017-03-10},
	journal = {arXiv:1602.07261 [cs]},
	author = {Szegedy, Christian and Ioffe, Sergey and Vanhoucke, Vincent and Alemi, Alex},
	month = feb,
	year = {2016},
	note = {arXiv: 1602.07261},
	keywords = {Computer Science - Computer Vision and Pattern Recognition},
	file = {arXiv\:1602.07261 PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/9F5UTUNV/Szegedy et al. - 2016 - Inception-v4, Inception-ResNet and the Impact of R.pdf:application/pdf;arXiv.org Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/69BMMSHX/1602.html:text/html}
}

@article{mikulik_learning_2013,
	title = {Learning {Vocabularies} over a {Fine} {Quantization}},
	volume = {103},
	issn = {0920-5691, 1573-1405},
	url = {https://link.springer.com/article/10.1007/s11263-012-0600-1},
	doi = {10.1007/s11263-012-0600-1},
	abstract = {A novel similarity measure for bag-of-words type large scale image retrieval is presented. The similarity function is learned in an unsupervised manner, requires no extra space over the standard bag-of-words method and is more discriminative than both L2-based soft assignment and Hamming embedding. The novel similarity function achieves mean average precision that is superior to any result published in the literature on the standard Oxford 5k, Oxford 105k and Paris datasets/protocols. We study the effect of a fine quantization and very large vocabularies (up to 64 million words) and show that the performance of specific object retrieval increases with the size of the vocabulary. This observation is in contradiction with previously published results. We further demonstrate that the large vocabularies increase the speed of the tf-idf scoring step.},
	language = {en},
	number = {1},
	urldate = {2017-03-10},
	journal = {International Journal of Computer Vision},
	author = {Mikulik, Andrej and Perdoch, Michal and Chum, Ond{\v r}ej and Matas, Ji{\v r}{\'i}},
	month = may,
	year = {2013},
	pages = {163--175},
	file = {download.pdf:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/IXU2C5RC/download.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/5C9AQAFK/s11263-012-0600-1.html:text/html}
}

@inproceedings{philbin_object_2007,
	title = {Object retrieval with large vocabularies and fast spatial matching},
	url = {http://ieeexplore.ieee.org/abstract/document/4270197/},
	urldate = {2017-03-10},
	booktitle = {Computer {Vision} and {Pattern} {Recognition}, 2007. {CVPR}'07. {IEEE} {Conference} on},
	publisher = {IEEE},
	author = {Philbin, James and Chum, Ondrej and Isard, Michael and Sivic, Josef and Zisserman, Andrew},
	year = {2007},
	pages = {1--8},
	file = {[PDF] ox.ac.uk:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/VN8NX7SX/Philbin et al. - 2007 - Object retrieval with large vocabularies and fast .pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/WNAN9FGM/4270197.html:text/html}
}

@inproceedings{babenko_neural_2014,
	title = {Neural codes for image retrieval},
	url = {http://link.springer.com/chapter/10.1007/978-3-319-10590-1_38},
	urldate = {2017-03-28},
	booktitle = {European conference on computer vision},
	publisher = {Springer},
	author = {Babenko, Artem and Slesarev, Anton and Chigorin, Alexandr and Lempitsky, Victor},
	year = {2014},
	pages = {584--599},
	file = {[PDF] arxiv.org:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/46SXQWC4/Babenko et al. - 2014 - Neural codes for image retrieval.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/FVNXNU8E/978-3-319-10590-1_38.html:text/html}
}

@inproceedings{long_fully_2015,
	title = {Fully convolutional networks for semantic segmentation},
	url = {http://www.cv-foundation.org/openaccess/content_cvpr_2015/html/Long_Fully_Convolutional_Networks_2015_CVPR_paper.html},
	urldate = {2017-05-04},
	booktitle = {Proceedings of the {IEEE} {Conference} on {Computer} {Vision} and {Pattern} {Recognition}},
	author = {Long, Jonathan and Shelhamer, Evan and Darrell, Trevor},
	year = {2015},
	pages = {3431--3440},
	file = {[PDF] cv-foundation.org:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/MBNSW6KZ/Long et al. - 2015 - Fully convolutional networks for semantic segmenta.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/436DPSHQ/Long_Fully_Convolutional_Networks_2015_CVPR_paper.html:text/html}
}

@article{kaiser_learning_2017,
	title = {Learning to remember rare events},
	url = {https://arxiv.org/abs/1703.03129},
	urldate = {2017-05-04},
	journal = {arXiv preprint arXiv:1703.03129},
	author = {Kaiser, {\L }ukasz and Nachum, Ofir and Roy, Aurko and Bengio, Samy},
	year = {2017},
	file = {[PDF] arxiv.org:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/HKHCDMTM/Kaiser et al. - 2017 - Learning to remember rare events.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/NTC297GU/1703.html:text/html}
}

@article{lowe_distinctive_2004,
	title = {Distinctive image features from scale-invariant keypoints},
	volume = {60},
	url = {http://www.springerlink.com/index/H4L02691327PX768.pdf},
	number = {2},
	urldate = {2017-05-12},
	journal = {International journal of computer vision},
	author = {Lowe, David G.},
	year = {2004},
	pages = {91--110},
	file = {sift-lowe.pdf:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/6W3SSP82/sift-lowe.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/5NKRAVAN/10.1023BVISI.0000029664.99615.html:text/html}
}

@inproceedings{ren_faster_2015,
	title = {Faster r-cnn: {Towards} real-time object detection with region proposal networks},
	shorttitle = {Faster r-cnn},
	url = {http://papers.nips.cc/paper/5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks},
	urldate = {2017-05-12},
	booktitle = {Advances in neural information processing systems},
	author = {Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian},
	year = {2015},
	pages = {91--99},
	file = {Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/UTNHPQFZ/5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks.html:text/html}
}

@article{lecun_gradient-based_1998,
	title = {Gradient-based learning applied to document recognition},
	volume = {86},
	url = {http://ieeexplore.ieee.org/abstract/document/726791/},
	number = {11},
	urldate = {2017-05-15},
	journal = {Proceedings of the IEEE},
	author = {LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and Haffner, Patrick},
	year = {1998},
	pages = {2278--2324},
	file = {[PDF] dengfanxin.cn:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/GX7R5QB4/LeCun et al. - 1998 - Gradient-based learning applied to document recogn.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/IFBU4ZFU/726791.html:text/html}
}

@book{shawe-taylor_kernel_2004,
	title = {Kernel methods for pattern analysis},
	url = {https://books.google.fr/books?hl=en&lr=&id=9i0vg12lti4C&oi=fnd&pg=PR8&dq=Kernel+Methods+for+Pattern+Analysis&ots=okFIrgaD4Y&sig=h5AZf30mASLbL7bWwT17wS0eXzo},
	urldate = {2017-05-15},
	publisher = {Cambridge university press},
	author = {Shawe-Taylor, John and Cristianini, Nello},
	year = {2004},
	file = {[PDF] ic.ac.uk:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/W5IFINEB/Shawe-Taylor and Cristianini - 2004 - Kernel methods for pattern analysis.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/MK956H8E/books.html:text/html}
}

@article{scholkopf_comparing_1997,
	title = {Comparing support vector machines with {Gaussian} kernels to radial basis function classifiers},
	volume = {45},
	url = {http://ieeexplore.ieee.org/abstract/document/650102/},
	number = {11},
	urldate = {2017-05-15},
	journal = {IEEE transactions on Signal Processing},
	author = {Scholkopf, Bernhard and Sung, Kah-Kay and Burges, Christopher JC and Girosi, Federico and Niyogi, Partha and Poggio, Tomaso and Vapnik, Vladimir},
	year = {1997},
	pages = {2758--2765},
	file = {[PDF] mit.edu:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/UJ2CRQ34/Scholkopf et al. - 1997 - Comparing support vector machines with Gaussian ke.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/7BI6EQS3/650102.html:text/html}
}

@article{vedaldi_efficient_2012,
	title = {Efficient additive kernels via explicit feature maps},
	volume = {34},
	url = {http://ieeexplore.ieee.org/abstract/document/6136519/},
	number = {3},
	urldate = {2017-05-15},
	journal = {IEEE transactions on pattern analysis and machine intelligence},
	author = {Vedaldi, Andrea and Zisserman, Andrew},
	year = {2012},
	pages = {480--492},
	file = {[PDF] ox.ac.uk:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/83C82SH8/Vedaldi and Zisserman - 2012 - Efficient additive kernels via explicit feature ma.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/AER2SRRI/6136519.html:text/html}
}

@article{hinton_fast_2006,
	title = {A fast learning algorithm for deep belief nets},
	volume = {18},
	url = {http://www.mitpressjournals.org/doi/abs/10.1162/neco.2006.18.7.1527},
	number = {7},
	urldate = {2017-05-15},
	journal = {Neural computation},
	author = {Hinton, Geoffrey E. and Osindero, Simon and Teh, Yee-Whye},
	year = {2006},
	pages = {1527--1554},
	file = {[PDF] mitpressjournals.org:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/K3BD39PZ/Hinton et al. - 2006 - A fast learning algorithm for deep belief nets.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/QG5CX832/neco.2006.18.7.html:text/html}
}

@inproceedings{freund_desicion-theoretic_1995,
	title = {A desicion-theoretic generalization of on-line learning and an application to boosting},
	url = {http://link.springer.com/chapter/10.1007/3-540-59119-2_166},
	urldate = {2017-05-15},
	booktitle = {European conference on computational learning theory},
	publisher = {Springer},
	author = {Freund, Yoav and Schapire, Robert E.},
	year = {1995},
	pages = {23--37},
	file = {[PDF] bu.edu:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/7JASS5V5/Freund and Schapire - 1995 - A desicion-theoretic generalization of on-line lea.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/AP5VRKXA/3-540-59119-2_166.html:text/html}
}

@inproceedings{philbin_lost_2008,
	title = {Lost in quantization: {Improving} particular object retrieval in large scale image databases},
	shorttitle = {Lost in quantization},
	url = {http://ieeexplore.ieee.org/abstract/document/4587635/},
	urldate = {2017-05-16},
	booktitle = {Computer {Vision} and {Pattern} {Recognition}, 2008. {CVPR} 2008. {IEEE} {Conference} on},
	publisher = {IEEE},
	author = {Philbin, James and Chum, Ondrej and Isard, Michael and Sivic, Josef and Zisserman, Andrew},
	year = {2008},
	pages = {1--8},
	file = {[PDF] ox.ac.uk:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/F28BFSZS/Philbin et al. - 2008 - Lost in quantization Improving particular object .pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/KD5FQCDG/4587635.html:text/html}
}

@inproceedings{chum_total_2007,
	title = {Total recall: {Automatic} query expansion with a generative feature model for object retrieval},
	shorttitle = {Total recall},
	url = {http://ieeexplore.ieee.org/abstract/document/4408891/},
	urldate = {2017-06-05},
	booktitle = {Computer {Vision}, 2007. {ICCV} 2007. {IEEE} 11th {International} {Conference} on},
	publisher = {IEEE},
	author = {Chum, Ondrej and Philbin, James and Sivic, Josef and Isard, Michael and Zisserman, Andrew},
	year = {2007},
	pages = {1--8},
	file = {[PDF] ox.ac.uk:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/S58BXEEV/Chum et al. - 2007 - Total recall Automatic query expansion with a gen.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/MNUTA786/4408891.html:text/html}
}

@inproceedings{arandjelovic_three_2012,
	title = {Three things everyone should know to improve object retrieval},
	url = {http://ieeexplore.ieee.org/abstract/document/6248018/},
	urldate = {2017-06-05},
	booktitle = {Computer {Vision} and {Pattern} {Recognition} ({CVPR}), 2012 {IEEE} {Conference} on},
	publisher = {IEEE},
	author = {Arandjelovi{\'c}, Relja and Zisserman, Andrew},
	year = {2012},
	pages = {2911--2918},
	file = {[PDF] sfu.ca:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/JA3DWUZV/Arandjelovi{\'c} and Zisserman - 2012 - Three things everyone should know to improve objec.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/IUVV3C46/6248018.html:text/html}
}

@inproceedings{turcot_better_2009,
	title = {Better matching with fewer features: {The} selection of useful features in large database recognition problems},
	shorttitle = {Better matching with fewer features},
	url = {http://ieeexplore.ieee.org/abstract/document/5457541/},
	urldate = {2017-06-05},
	booktitle = {Computer {Vision} {Workshops} ({ICCV} {Workshops}), 2009 {IEEE} 12th {International} {Conference} on},
	publisher = {IEEE},
	author = {Turcot, Panu and Lowe, David G.},
	year = {2009},
	pages = {2109--2116},
	file = {[PDF] semanticscholar.org:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/UQVQD94X/Turcot and Lowe - 2009 - Better matching with fewer features The selection.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/HDHX5U54/5457541.html:text/html}
}

@article{jegou_accurate_2010,
	title = {Accurate image search using the contextual dissimilarity measure},
	volume = {32},
	url = {http://ieeexplore.ieee.org/abstract/document/4695831/},
	number = {1},
	urldate = {2017-06-09},
	journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on},
	author = {Jegou, Herve and Schmid, Cordelia and Harzallah, Hedi and Verbeek, Jakob},
	year = {2010},
	pages = {2--11},
	file = {[PDF] from archives-ouvertes.fr:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/3NC3JCPJ/Jegou et al. - 2010 - Accurate image search using the contextual dissimi.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/SRSF4J64/4695831.html:text/html}
}

@article{cinbis_weakly_2017,
	title = {Weakly {Supervised} {Object} {Localization} with {Multi}-fold {Multiple} {Instance} {Learning}},
	volume = {39},
	issn = {0162-8828, 2160-9292},
	url = {http://arxiv.org/abs/1503.00949},
	doi = {10.1109/TPAMI.2016.2535231},
	abstract = {Object category localization is a challenging problem in computer vision. Standard supervised training requires bounding box annotations of object instances. This time-consuming annotation process is sidestepped in weakly supervised learning. In this case, the supervised information is restricted to binary labels that indicate the absence/presence of object instances in the image, without their locations. We follow a multiple-instance learning approach that iteratively trains the detector and infers the object locations in the positive training images. Our main contribution is a multi-fold multiple instance learning procedure, which prevents training from prematurely locking onto erroneous object locations. This procedure is particularly important when using high-dimensional representations, such as Fisher vectors and convolutional neural network features. We also propose a window refinement method, which improves the localization accuracy by incorporating an objectness prior. We present a detailed experimental evaluation using the PASCAL VOC 2007 dataset, which verifies the effectiveness of our approach.},
	number = {1},
	urldate = {2017-06-08},
	journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
	author = {Cinbis, Ramazan Gokberk and Verbeek, Jakob and Schmid, Cordelia},
	month = jan,
	year = {2017},
	note = {arXiv: 1503.00949},
	keywords = {Computer Science - Computer Vision and Pattern Recognition},
	pages = {189--203},
	file = {arXiv\:1503.00949 PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/QNNBZI4X/Cinbis et al. - 2017 - Weakly Supervised Object Localization with Multi-f.pdf:application/pdf;arXiv.org Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/WWGSXIZZ/1503.html:text/html}
}

@inproceedings{lin_deephash_2017,
	address = {New York, NY, USA},
	series = {{ICMR} '17},
	title = {{DeepHash} for {Image} {Instance} {Retrieval}: {Getting} {Regularization}, {Depth} and {Fine}-{Tuning} {Right}},
	isbn = {978-1-4503-4701-3},
	shorttitle = {{DeepHash} for {Image} {Instance} {Retrieval}},
	url = {http://doi.acm.org/10.1145/3078971.3078983},
	doi = {10.1145/3078971.3078983},
	abstract = {This work focuses on representing very high-dimensional global image descriptors using very compact 64-1024 bit binary hashes for instance retrieval. We propose DeepHash: a hashing scheme based on deep networks. Key to making DeepHash work at extremely low bitrates are three important considerations -- regularization, depth and fine-tuning -- each requiring solutions specific to the hashing problem. In-depth evaluation shows that our scheme outperforms state-of-the-art methods over several benchmark datasets for both Fisher Vectors and Deep Convolutional Neural Network features, by up to 8.5\% over other schemes. The retrieval performance with 256-bit hashes is close to that of the uncompressed floating point features -- a remarkable 512x compression.},
	urldate = {2017-06-08},
	booktitle = {Proceedings of the 2017 {ACM} on {International} {Conference} on {Multimedia} {Retrieval}},
	publisher = {ACM},
	author = {Lin, Jie and Mor{\`e}re, Olivier and Veillard, Antoine and Duan, Ling-Yu and Goh, Hanlin and Chandrasekhar, Vijay},
	year = {2017},
	keywords = {cnn, fisher vectors, hashing, image instance retrieval, rbm, regularization, siamese network},
	pages = {133--141},
	file = {ACM Full Text PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/8ZFZP575/Lin et al. - 2017 - DeepHash for Image Instance Retrieval Getting Reg.pdf:application/pdf}
}

@article{mikolajczyk_scale_2004,
	title = {Scale \& affine invariant interest point detectors},
	volume = {60},
	url = {http://www.springerlink.com/index/H37T7833M7037173.pdf},
	number = {1},
	urldate = {2017-06-09},
	journal = {International journal of computer vision},
	author = {Mikolajczyk, Krystian and Schmid, Cordelia},
	year = {2004},
	pages = {63--86},
	file = {[PDF] ox.ac.uk:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/HWIK3WPZ/Mikolajczyk and Schmid - 2004 - Scale & affine invariant interest point detectors.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/D5K9M5P2/10.1023BVISI.0000027790.02288.html:text/html}
}

@article{russakovsky_imagenet_2015,
	title = {{ImageNet} {Large} {Scale} {Visual} {Recognition} {Challenge}},
	volume = {115},
	issn = {0920-5691, 1573-1405},
	url = {http://link.springer.com/10.1007/s11263-015-0816-y},
	doi = {10.1007/s11263-015-0816-y},
	language = {en},
	number = {3},
	urldate = {2017-06-12},
	journal = {International Journal of Computer Vision},
	author = {Russakovsky, Olga and Deng, Jia and Su, Hao and Krause, Jonathan and Satheesh, Sanjeev and Ma, Sean and Huang, Zhiheng and Karpathy, Andrej and Khosla, Aditya and Bernstein, Michael and Berg, Alexander C. and Fei-Fei, Li},
	month = dec,
	year = {2015},
	pages = {211--252}
}

@inproceedings{rublee_orb:_2011,
	title = {{ORB}: {An} efficient alternative to {SIFT} or {SURF}},
	shorttitle = {{ORB}},
	url = {http://ieeexplore.ieee.org/abstract/document/6126544/},
	urldate = {2017-06-12},
	booktitle = {Computer {Vision} ({ICCV}), 2011 {IEEE} {International} {Conference} on},
	publisher = {IEEE},
	author = {Rublee, Ethan and Rabaud, Vincent and Konolige, Kurt and Bradski, Gary},
	year = {2011},
	pages = {2564--2571},
	file = {[PDF] chubu.ac.jp:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/SQT84CPG/Rublee et al. - 2011 - ORB An efficient alternative to SIFT or SURF.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/7S62S9V7/6126544.html:text/html}
}

@article{sanchez_image_2013,
	title = {Image classification with the fisher vector: {Theory} and practice},
	volume = {105},
	shorttitle = {Image classification with the fisher vector},
	url = {http://link.springer.com/article/10.1007/s11263-013-0636-x},
	number = {3},
	urldate = {2017-06-12},
	journal = {International journal of computer vision},
	author = {S{\'a}nchez, Jorge and Perronnin, Florent and Mensink, Thomas and Verbeek, Jakob},
	year = {2013},
	pages = {222--245},
	file = {[PDF] inria.fr:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/P5AUMBC3/S{\'a}nchez et al. - 2013 - Image classification with the fisher vector Theor.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/IIRAVXCM/s11263-013-0636-x.html:text/html}
}

@inproceedings{jegou_aggregating_2010,
	title = {Aggregating local descriptors into a compact image representation},
	url = {http://ieeexplore.ieee.org/abstract/document/5540039/},
	urldate = {2017-06-12},
	booktitle = {Computer {Vision} and {Pattern} {Recognition} ({CVPR}), 2010 {IEEE} {Conference} on},
	publisher = {IEEE},
	author = {J{\'e}gou, Herv{\'e} and Douze, Matthijs and Schmid, Cordelia and P{\'e}rez, Patrick},
	year = {2010},
	pages = {3304--3311},
	file = {[PDF] inria.fr:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/5GMBXZ94/J{\'e}gou et al. - 2010 - Aggregating local descriptors into a compact image.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/ADIZ2JRM/5540039.html:text/html}
}

@inproceedings{jaakkola_exploiting_1999,
	title = {Exploiting generative models in discriminative classifiers},
	url = {http://papers.nips.cc/paper/1520-exploiting-generative-models-in-discriminative-classifiers.pdf},
	urldate = {2017-06-12},
	booktitle = {Advances in neural information processing systems},
	author = {Jaakkola, Tommi and Haussler, David},
	year = {1999},
	pages = {487--493},
	file = {[PDF] nips.cc:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/NDQCFCKJ/Jaakkola and Haussler - 1999 - Exploiting generative models in discriminative cla.pdf:application/pdf}
}

@inproceedings{perronnin_large-scale_2010,
	title = {Large-scale image retrieval with compressed fisher vectors},
	url = {http://ieeexplore.ieee.org/abstract/document/5540009/},
	urldate = {2017-06-12},
	booktitle = {Computer {Vision} and {Pattern} {Recognition} ({CVPR}), 2010 {IEEE} {Conference} on},
	publisher = {IEEE},
	author = {Perronnin, Florent and Liu, Yan and S{\'a}nchez, Jorge and Poirier, Herv{\'e}},
	year = {2010},
	pages = {3384--3391},
	file = {[PDF] unifi.it:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/CMDM28ND/Perronnin et al. - 2010 - Large-scale image retrieval with compressed fisher.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/WZPAJEVF/5540009.html:text/html}
}

@inproceedings{portaz_construction_2017,
	title = {Construction et {\'e}valuation d'un corpus pour la recherche d'instances d'images mus{\'e}ales},
	url = {https://doi.org/10.24348/coria.2017.5},
	doi = {10.24348/coria.2017.5},
	booktitle = {{COnf{\'e}rence} en {Recherche} d'{Informations} et {Applications} - {CORIA} 2017, 14th {French} {Information} {Retrieval} {Conference}. {Marseille}, {France}, {March} 29-31, 2017. {Proceedings}, {Marseille}, {France}, {March} 29-31, 2017.},
	author = {Portaz, Maxime and Poignant, Johann and Budnik, Mateusz and Mulhem, Philippe and Chevallet, Jean-Pierre and Goeuriot, Lorraine},
	year = {2017},
	pages = {17--34}
}

@book{nie_conference_2017,
	title = {{COnf{\'e}rence} en {Recherche} d'{Informations} et {Applications} - {CORIA} 2017, 14th {French} {Information} {Retrieval} {Conference}. {Marseille}, {France}, {March} 29-31, 2017. {Proceedings}, {Marseille}, {France}, {March} 29-31, 2017},
	isbn = {978-2-9556434-0-2},
	publisher = {ARIA},
	editor = {Nie, Jian-Yun and Lamprier, Sylvain},
	year = {2017}
}

@article{rumelhart_learning_1988,
	title = {Learning representations by back-propagating errors},
	volume = {5},
	url = {https://books.google.fr/books?hl=en&lr=&id=FJblV_iOPjIC&oi=fnd&pg=PA213&dq=Learning+representations+by+back-propagating+errors&ots=zZLq3iGXWV&sig=C0k8P9ZshuRs-OMf4T6gMFuS0cU},
	number = {3},
	urldate = {2017-06-13},
	journal = {Cognitive modeling},
	author = {Rumelhart, David E. and Hinton, Geoffrey E. and Williams, Ronald J.},
	year = {1988},
	pages = {1},
	file = {[PDF] toronto.edu:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/UE29HCEC/Rumelhart et al. - 1988 - Learning representations by back-propagating error.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/F6MICEMR/books.html:text/html}
}

@techreport{rumelhart_learning_1985,
	title = {Learning internal representations by error propagation},
	url = {http://oai.dtic.mil/oai/oai?verb=getRecord&metadataPrefix=html&identifier=ADA164453},
	urldate = {2017-06-13},
	institution = {DTIC Document},
	author = {Rumelhart, David E. and Hinton, Geoffrey E. and Williams, Ronald J.},
	year = {1985},
	file = {[PDF] dtic.mil:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/B4BKH6XH/Rumelhart et al. - 1985 - Learning internal representations by error propaga.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/9KNF39JU/oai.html:text/html}
}

@incollection{bottou_large-scale_2010,
	title = {Large-scale machine learning with stochastic gradient descent},
	url = {http://link.springer.com/chapter/10.1007/978-3-7908-2604-3_16},
	urldate = {2017-06-13},
	booktitle = {Proceedings of {COMPSTAT}'2010},
	publisher = {Springer},
	author = {Bottou, L{\'e}on},
	year = {2010},
	pages = {177--186},
	file = {[PDF] bottou.org:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/3Z4CUC6F/Bottou - 2010 - Large-scale machine learning with stochastic gradi.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/6R9A5DP8/978-3-7908-2604-3_16.html:text/html}
}

@incollection{bottou_stochastic_2012,
	title = {Stochastic gradient descent tricks},
	url = {http://link.springer.com/10.1007/978-3-642-35289-8_25},
	urldate = {2017-06-13},
	booktitle = {Neural networks: {Tricks} of the trade},
	publisher = {Springer},
	author = {Bottou, L{\'e}on},
	year = {2012},
	pages = {421--436},
	file = {[PDF] uci.edu:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/NN5AAT3U/Bottou - 2012 - Stochastic gradient descent tricks.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/VG6K4KBX/10.html:text/html}
}

@inproceedings{sivic_video_2003,
	title = {Video google: {A} text retrieval approach to object matching in videos.},
	volume = {2},
	shorttitle = {Video google},
	url = {http://ieeexplore.ieee.org/abstract/document/1238663/},
	urldate = {2017-06-13},
	booktitle = {iccv},
	author = {Sivic, Josef and Zisserman, Andrew and {others}},
	year = {2003},
	pages = {1470--1477},
	file = {[PDF] loria.fr:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/VQQNNSDZ/Sivic et al. - 2003 - Video google A text retrieval approach to object .pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/AHC4DWQU/1238663.html:text/html}
}

@inproceedings{csurka_visual_2004,
	title = {Visual categorization with bags of keypoints},
	volume = {1},
	url = {https://people.eecs.berkeley.edu/~efros/courses/AP06/Papers/csurka-eccv-04.pdf},
	urldate = {2017-06-13},
	booktitle = {Workshop on statistical learning in computer vision, {ECCV}},
	publisher = {Prague},
	author = {Csurka, Gabriella and Dance, Christopher and Fan, Lixin and Willamowski, Jutta and Bray, C{\'e}dric},
	year = {2004},
	pages = {1--2},
	file = {[PDF] berkeley.edu:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/EZUZWSR9/Csurka et al. - 2004 - Visual categorization with bags of keypoints.pdf:application/pdf}
}

@inproceedings{oquab_is_2015,
	title = {Is object localization for free?-weakly-supervised learning with convolutional neural networks},
	shorttitle = {Is object localization for free?},
	url = {http://www.cv-foundation.org/openaccess/content_cvpr_2015/html/Oquab_Is_Object_Localization_2015_CVPR_paper.html},
	urldate = {2017-06-13},
	booktitle = {Proceedings of the {IEEE} {Conference} on {Computer} {Vision} and {Pattern} {Recognition}},
	author = {Oquab, Maxime and Bottou, L{\'e}on and Laptev, Ivan and Sivic, Josef},
	year = {2015},
	pages = {685--694},
	file = {[PDF] cv-foundation.org:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/TSHSHCEE/Oquab et al. - 2015 - Is object localization for free-weakly-supervised.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/FGS7QPH2/Oquab_Is_Object_Localization_2015_CVPR_paper.html:text/html}
}

@inproceedings{chandrasekhar_compression_2017,
	title = {Compression of {Deep} {Neural} {Networks} for {Image} {Instance} {Retrieval}},
	doi = {10.1109/DCC.2017.93},
	abstract = {Image instance retrieval is the problem of retrieving images from a database which contain the same object. Convolutional Neural Network (CNN) based descriptors are becoming the dominant approach for generating global image descriptors for the instance retrieval problem. One major drawback of CNN-based global descriptors is that uncompressed deep neural network models require hundreds of megabytes of storage making them inconvenient to deploy in mobile applications or in custom hardware. In this work, we study the problem of neural network model compression focusing on the image instance retrieval task. We study quantization, coding, pruning and weight sharing techniques for reducing model size for the instance retrieval problem. We provide extensive experimental results on the trade-off between retrieval performance and model size for different types of networks on several data sets providing the most comprehensive study on this topic. We compress models to the order of a few MBs: two orders of magnitude smaller than the uncompressed models while achieving negligible loss in retrieval performance1.},
	booktitle = {2017 {Data} {Compression} {Conference} ({DCC})},
	author = {Chandrasekhar, V. and Lin, J. and Liao, Q. and Mor{\`e}re, O. and Veillard, A. and Duan, L. and Poggio, T.},
	month = apr,
	year = {2017},
	keywords = {Biological neural networks, cnn, coding, Compression, Convolutional codes, data compression, data sets, deep neural network compression, Encoding, Feature extraction, global image descriptors, image coding, image instance retrieval, image retrieval, instance retrieval problem, mobile applications, mobile computing, model size reduction, neural nets, pruning, quantization, Quantization (signal), uncompressed models, weight sharing techniques},
	pages = {300--309},
	file = {IEEE Xplore Abstract Record:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/AUJZV7TQ/7923643.html:text/html}
}

@inproceedings{veit_conditional_2017,
	address = {Honolulu, HI},
	title = {Conditional {Similarity} {Networks}},
	url = {https://vision.cornell.edu/se3/wp-content/uploads/2017/04/CSN_CVPR-1.pdf},
	booktitle = {Computer {Vision} and {Pattern} {Recognition} ({CVPR})},
	author = {Veit, Andreas and Belongie, Serge and Karaletsos, Theofanis},
	month = jul,
	year = {2017}
}

@inproceedings{he_deep_2016,
	title = {Deep {Residual} {Learning} for {Image} {Recognition}},
	booktitle = {The {IEEE} {Conference} on {Computer} {Vision} and {Pattern} {Recognition} ({CVPR})},
	author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
	month = jun,
	year = {2016}
}

@inproceedings{huang_densely_2017,
	title = {Densely connected convolutional networks},
	booktitle = {Proceedings of the {IEEE} {Conference} on {Computer} {Vision} and {Pattern} {Recognition}},
	author = {Huang, Gao and Liu, Zhuang and van der Maaten, Laurens and Weinberger, Kilian Q},
	year = {2017}
}

@article{gordo_end--end_2017,
	title = {End-to-{End} {Learning} of {Deep} {Visual} {Representations} for {Image} {Retrieval}},
	issn = {0920-5691, 1573-1405},
	url = {https://link.springer.com/article/10.1007/s11263-017-1016-8},
	doi = {10.1007/s11263-017-1016-8},
	abstract = {While deep learning has become a key ingredient in the top performing methods for many computer vision tasks, it has failed so far to bring similar improvements to instance-level image retrieval. In this article, we argue that reasons for the underwhelming results of deep methods on image retrieval are threefold: (1) noisy training data, (2) inappropriate deep architecture, and (3) suboptimal training procedure. We address all three issues. First, we leverage a large-scale but noisy landmark dataset and develop an automatic cleaning method that produces a suitable training set for deep retrieval. Second, we build on the recent R-MAC descriptor, show that it can be interpreted as a deep and differentiable architecture, and present improvements to enhance it. Last, we train this network with a siamese architecture that combines three streams with a triplet loss. At the end of the training process, the proposed architecture produces a global image representation in a single forward pass that is well suited for image retrieval. Extensive experiments show that our approach significantly outperforms previous retrieval approaches, including state-of-the-art methods based on costly local descriptor indexing and spatial verification. On Oxford 5k, Paris 6k and Holidays, we respectively report 94.7, 96.6, and 94.8 mean average precision. Our representations can also be heavily compressed using product quantization with little loss in accuracy.},
	language = {en},
	urldate = {2017-06-13},
	journal = {International Journal of Computer Vision},
	author = {Gordo, Albert and Almaz{\'a}n, Jon and Revaud, Jerome and Larlus, Diane},
	month = jun,
	year = {2017},
	pages = {1--18},
	file = {[PDF] arxiv.org:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/MVHWDJNW/Gordo et al. - 2016 - End-to-end learning of deep visual representations.pdf:application/pdf;Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/P3RHVHWK/10.html:text/html}
}

@inproceedings{salvador_faster_2016,
	title = {Faster {R}-{CNN} {Features} for {Instance} {Search}},
	booktitle = {The {IEEE} {Conference} on {Computer} {Vision} and {Pattern} {Recognition} ({CVPR}) {Workshops}},
	author = {Salvador, Amaia and Giro-i-Nieto, Xavier and Marques, Ferran and Satoh, Shin'ichi},
	month = jun,
	year = {2016},
	file = {arXiv\:1604.08893 PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/HE9C6MN5/Salvador et al. - 2016 - Faster R-CNN Features for Instance Search.pdf:application/pdf}
}

@inproceedings{jegou_hamming_2008,
	address = {Berlin, Heidelberg},
	series = {{ECCV} '08},
	title = {Hamming {Embedding} and {Weak} {Geometric} {Consistency} for {Large} {Scale} {Image} {Search}},
	isbn = {978-3-540-88681-5},
	url = {http://dx.doi.org/10.1007/978-3-540-88682-2_24},
	doi = {10.1007/978-3-540-88682-2_24},
	abstract = {This paper improves recent methods for large scale image search. State-of-the-art methods build on the bag-of-features image representation. We, first, analyze bag-of-features in the framework of approximate nearest neighbor search. This shows the sub-optimality of such a representation for matching descriptors and leads us to derive a more precise representation based on 1) Hamming embedding (HE) and 2) weak geometric consistency constraints (WGC). HE provides binary signatures that refine the matching based on visual words. WGC filters matching descriptors that are not consistent in terms of angle and scale. HE and WGC are integrated within the inverted file and are efficiently exploited for all images, even in the case of very large datasets. Experiments performed on a dataset of one million of images show a significant improvement due to the binary signature and the weak geometric consistency constraints, as well as their efficiency. Estimation of the full geometric transformation, i.e., a re-ranking step on a short list of images, is complementary to our weak geometric consistency constraints and allows to further improve the accuracy.},
	urldate = {2017-06-13},
	booktitle = {Proceedings of the 10th {European} {Conference} on {Computer} {Vision}: {Part} {I}},
	publisher = {Springer-Verlag},
	author = {Jegou, Herve and Douze, Matthijs and Schmid, Cordelia},
	year = {2008},
	pages = {304--317},
	file = {[PDF] inria.fr:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/NDT3689C/J{\'e}gou et al. - 2008 - Hamming embedding and weak geometry consistency fo.pdf:application/pdf}
}

@incollection{yosinski_how_2014,
	title = {How transferable are features in deep neural networks?},
	url = {http://papers.nips.cc/paper/5347-how-transferable-are-features-in-deep-neural-networks.pdf},
	booktitle = {Advances in {Neural} {Information} {Processing} {Systems} 27},
	publisher = {Curran Associates, Inc.},
	author = {Yosinski, Jason and Clune, Jeff and Bengio, Yoshua and Lipson, Hod},
	editor = {Ghahramani, Z. and Welling, M. and Cortes, C. and Lawrence, N. d and Weinberger, K. q},
	year = {2014},
	pages = {3320--3328},
	file = {arXiv\:1411.1792 PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/X3VE3D4F/Yosinski et al. - 2014 - How transferable are features in deep neural netwo.pdf:application/pdf}
}

@article{simonyan_very_2014,
	title = {Very {Deep} {Convolutional} {Networks} for {Large}-{Scale} {Image} {Recognition}},
	volume = {abs/1409.1556},
	journal = {CoRR},
	author = {Simonyan, K. and Zisserman, A.},
	year = {2014},
	file = {arXiv\:1409.1556 PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/URH4NTMM/Simonyan and Zisserman - 2014 - Very Deep Convolutional Networks for Large-Scale I.pdf:application/pdf}
}

@inproceedings{ioffe_batch_2015,
	address = {Lille, France},
	series = {Proceedings of {Machine} {Learning} {Research}},
	title = {Batch {Normalization}: {Accelerating} {Deep} {Network} {Training} by {Reducing} {Internal} {Covariate} {Shift}},
	volume = {37},
	url = {http://proceedings.mlr.press/v37/ioffe15.html},
	abstract = {Training Deep Neural Networks is complicated by the fact that the distribution of each layer{\textquoteright}s inputs changes during training, as the parameters of the previous layers change. This slows down the training by requiring lower learning rates and careful parameter initialization, and makes it notoriously hard to train models with saturating nonlinearities. We refer to this phenomenon as internal covariate shift, and address the problem by normalizing layer inputs. Our method draws its strength from making normalization a part of the model architecture and performing the normalization for each training mini-batch. Batch Normalization allows us to use much higher learning rates and be less careful about initialization, and in some cases eliminates the need for Dropout. Applied to a stateof-the-art image classification model, Batch Normalization achieves the same accuracy with 14 times fewer training steps, and beats the original model by a significant margin. Using an ensemble of batch-normalized networks, we improve upon the best published result on ImageNet classification: reaching 4.82\% top-5 test error, exceeding the accuracy of human raters.},
	booktitle = {Proceedings of the 32nd {International} {Conference} on {Machine} {Learning}},
	publisher = {PMLR},
	author = {Ioffe, Sergey and Szegedy, Christian},
	editor = {Bach, Francis and Blei, David},
	month = jul,
	year = {2015},
	pages = {448--456},
	file = {arXiv\:1502.03167 PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/FSQ8XM9G/Ioffe and Szegedy - 2015 - Batch Normalization Accelerating Deep Network Tra.pdf:application/pdf}
}

@article{hinton_improving_2012,
	title = {Improving neural networks by preventing co-adaptation of feature detectors},
	url = {http://arxiv.org/abs/1207.0580},
	abstract = {When a large feedforward neural network is trained on a small training set, it typically performs poorly on held-out test data. This "overfitting" is greatly reduced by randomly omitting half of the feature detectors on each training case. This prevents complex co-adaptations in which a feature detector is only helpful in the context of several other specific feature detectors. Instead, each neuron learns to detect a feature that is generally helpful for producing the correct answer given the combinatorially large variety of internal contexts in which it must operate. Random "dropout" gives big improvements on many benchmark tasks and sets new records for speech and object recognition.},
	urldate = {2017-06-13},
	journal = {arXiv:1207.0580 [cs]},
	author = {Hinton, Geoffrey E. and Srivastava, Nitish and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan R.},
	month = jul,
	year = {2012},
	note = {arXiv: 1207.0580},
	keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Learning, Computer Science - Neural and Evolutionary Computing},
	file = {arXiv\:1207.0580 PDF:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/8B4CAJCR/Hinton et al. - 2012 - Improving neural networks by preventing co-adaptat.pdf:application/pdf;arXiv.org Snapshot:/home/kohlm/.mozilla/firefox/n3dzy4ns.default/zotero/storage/NG44DJJF/1207.html:text/html}
}