1+ /*
2+ * This sample program is ported by C# from examples\dnn_face_recognition_ex.cpp.
3+ */
4+
5+ using System ;
6+ using System . Collections . Generic ;
7+ using System . Linq ;
8+ using DlibDotNet ;
9+
10+ namespace DnnFaceRecognition
11+ {
12+
13+ internal class Program
14+ {
15+
16+ private static void Main ( string [ ] args )
17+ {
18+ if ( args . Length != 1 )
19+ {
20+ Console . WriteLine ( "Run this example by invoking it like this: " ) ;
21+ Console . WriteLine ( " ./DnnFaceRecognition faces/bald_guys.jpg" ) ;
22+ Console . WriteLine ( "You will also need to get the face landmarking model file as well as " ) ;
23+ Console . WriteLine ( "the face recognition model file. Download and then decompress these files from: " ) ;
24+ Console . WriteLine ( "http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2" ) ;
25+ Console . WriteLine ( "http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2" ) ;
26+ return ;
27+ }
28+
29+ // The first thing we are going to do is load all our models. First, since we need to
30+ // find faces in the image we will need a face detector:
31+ using ( var detector = FrontalFaceDetector . GetFrontalFaceDetector ( ) )
32+ // We will also use a face landmarking model to align faces to a standard pose: (see face_landmark_detection_ex.cpp for an introduction)
33+ using ( var sp = new ShapePredictor ( "shape_predictor_5_face_landmarks.dat" ) )
34+ // And finally we load the DNN responsible for face recognition.
35+ using ( var net = DlibDotNet . Dnn . LossMetric . Deserialize ( "dlib_face_recognition_resnet_model_v1.dat" ) )
36+
37+ using ( var img = Dlib . LoadImage < RgbPixel > ( args [ 0 ] ) )
38+ using ( var mat = new Matrix < RgbPixel > ( img ) )
39+
40+ // Display the raw image on the screen
41+ using ( var win = new ImageWindow ( img ) )
42+ {
43+ // Run the face detector on the image of our action heroes, and for each face extract a
44+ // copy that has been normalized to 150x150 pixels in size and appropriately rotated
45+ // and centered.
46+ var faces = new List < Matrix < RgbPixel > > ( ) ;
47+ foreach ( var face in detector . Detect ( img ) )
48+ {
49+ var shape = sp . Detect ( img , face ) ;
50+ var faceChipDetail = Dlib . GetFaceChipDetails ( shape , 150 , 0.25 ) ;
51+ var faceChip = Dlib . ExtractImageChip < RgbPixel > ( mat , faceChipDetail ) ;
52+
53+ //faces.Add(move(face_chip));
54+ faces . Add ( faceChip ) ;
55+
56+ // Also put some boxes on the faces so we can see that the detector is finding
57+ // them.
58+ win . AddOverlay ( face ) ;
59+ }
60+
61+ if ( ! faces . Any ( ) )
62+ {
63+ Console . WriteLine ( "No faces found in image!" ) ;
64+ return ;
65+ }
66+
67+ // This call asks the DNN to convert each face image in faces into a 128D vector.
68+ // In this 128D vector space, images from the same person will be close to each other
69+ // but vectors from different people will be far apart. So we can use these vectors to
70+ // identify if a pair of images are from the same person or from different people.
71+ var faceDescriptors = net . Operator ( faces ) ;
72+
73+ // In particular, one simple thing we can do is face clustering. This next bit of code
74+ // creates a graph of connected faces and then uses the Chinese whispers graph clustering
75+ // algorithm to identify how many people there are and which faces belong to whom.
76+ var edges = new List < SamplePair > ( ) ;
77+ for ( uint i = 0 ; i < faceDescriptors . Count ; ++ i )
78+ {
79+ for ( var j = i ; j < faceDescriptors . Count ; ++ j )
80+ {
81+ // Faces are connected in the graph if they are close enough. Here we check if
82+ // the distance between two face descriptors is less than 0.6, which is the
83+ // decision threshold the network was trained to use. Although you can
84+ // certainly use any other threshold you find useful.
85+ var diff = faceDescriptors [ i ] - faceDescriptors [ j ] ;
86+ if ( Dlib . Length ( diff ) < 0.6 )
87+ edges . Add ( new SamplePair ( i , j ) ) ;
88+ }
89+ }
90+
91+ Dlib . ChineseWhispers ( edges , 100 , out var numClusters , out var labels ) ;
92+
93+ // This will correctly indicate that there are 4 people in the image.
94+ Console . WriteLine ( $ "number of people found in the image: { numClusters } ") ;
95+
96+ // Now let's display the face clustering results on the screen. You will see that it
97+ // correctly grouped all the faces.
98+ var winClusters = new List < ImageWindow > ( ) ;
99+ for ( var i = 0 ; i < numClusters ; i ++ )
100+ winClusters . Add ( new ImageWindow ( ) ) ;
101+ var tileImages = new List < Matrix < RgbPixel > > ( ) ;
102+ for ( var clusterId = 0ul ; clusterId < numClusters ; ++ clusterId )
103+ {
104+ var temp = new List < Matrix < RgbPixel > > ( ) ;
105+ for ( var j = 0 ; j < labels . Length ; ++ j )
106+ {
107+ if ( clusterId == labels [ j ] )
108+ temp . Add ( faces [ j ] ) ;
109+ }
110+
111+ winClusters [ ( int ) clusterId ] . Title = $ "face cluster { clusterId } ";
112+ var tileImage = Dlib . TileImages ( temp ) ;
113+ tileImages . Add ( tileImage ) ;
114+ winClusters [ ( int ) clusterId ] . SetImage ( tileImage ) ;
115+ }
116+
117+ // Finally, let's print one of the face descriptors to the screen.
118+ using ( var trans = Dlib . Trans ( faceDescriptors [ 0 ] ) )
119+ {
120+ Console . WriteLine ( $ "face descriptor for one face: { trans } ") ;
121+
122+ // It should also be noted that face recognition accuracy can be improved if jittering
123+ // is used when creating face descriptors. In particular, to get 99.38% on the LFW
124+ // benchmark you need to use the jitter_image() routine to compute the descriptors,
125+ // like so:
126+ var jitterImages = JitterImage ( faces [ 0 ] ) . ToArray ( ) ;
127+ var ret = net . Operator ( jitterImages ) ;
128+ using ( var m = Dlib . Mat ( ret ) )
129+ using ( var faceDescriptor = Dlib . Mean < float > ( m ) )
130+ using ( var t = Dlib . Trans ( faceDescriptor ) )
131+ {
132+ Console . WriteLine ( $ "jittered face descriptor for one face: { t } ") ;
133+
134+ // If you use the model without jittering, as we did when clustering the bald guys, it
135+ // gets an accuracy of 99.13% on the LFW benchmark. So jittering makes the whole
136+ // procedure a little more accurate but makes face descriptor calculation slower.
137+
138+ Console . WriteLine ( "hit enter to terminate" ) ;
139+ Console . ReadKey ( ) ;
140+
141+ foreach ( var jitterImage in jitterImages )
142+ jitterImage . Dispose ( ) ;
143+
144+ foreach ( var tileImage in tileImages )
145+ tileImage . Dispose ( ) ;
146+
147+ foreach ( var edge in edges )
148+ edge . Dispose ( ) ;
149+
150+ foreach ( var descriptor in faceDescriptors )
151+ descriptor . Dispose ( ) ;
152+
153+ foreach ( var face in faces )
154+ face . Dispose ( ) ;
155+ }
156+ }
157+ }
158+ }
159+
160+ private static IEnumerable < Matrix < RgbPixel > > JitterImage ( Matrix < RgbPixel > img )
161+ {
162+ // All this function does is make 100 copies of img, all slightly jittered by being
163+ // zoomed, rotated, and translated a little bit differently. They are also randomly
164+ // mirrored left to right.
165+ var rnd = new Rand ( ) ;
166+
167+ var crops = new List < Matrix < RgbPixel > > ( ) ;
168+ for ( var i = 0 ; i < 100 ; ++ i )
169+ crops . Add ( Dlib . JitterImage ( img , rnd ) ) ;
170+
171+ return crops ;
172+ }
173+
174+ }
175+
176+ }
0 commit comments