forked from filippofilip95/car-logos-dataset
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLogosScrapper.ts
More file actions
104 lines (83 loc) · 2.81 KB
/
LogosScrapper.ts
File metadata and controls
104 lines (83 loc) · 2.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import { Manufacturer, Manufacturers, ManufacturersLogos } from "./types";
import BaseScrapper from "./BaseScrapper";
const BASE_URL = "https://www.carlogos.org";
const Url = {
AllManufacturers: `${BASE_URL}/car-brands-a-z`,
Manufacturer: (url: string) => `${BASE_URL}/${url}`,
};
const Selectors = {
AllManufacturers: ".a-z dd a",
ManufacturerLogo: `div.logo-content a img`,
};
class LogosScrapper extends BaseScrapper {
manufacturers: Manufacturers = [];
manufacturersLogos: ManufacturersLogos = [];
private fixUrl(url: string) {
return url.startsWith("http") ? url : `${BASE_URL}${url}`;
}
private async recognizeManufacturers() {
const document = await this.loadDocument(Url.AllManufacturers);
const text = document(Selectors.AllManufacturers);
text.each((index, element) => {
const manufacturerNode = document(element);
const url = manufacturerNode.attr("href");
const name = manufacturerNode.text();
if (url && name) {
this.manufacturers.push({name, url});
}
});
}
private async downloadLogos(): Promise<void> {
const queue = new this.queue({concurrency: 5});
const runners = this.manufacturers.map(this.createLogoDownloader);
runners.forEach((runner) => queue.push(runner));
return new Promise((resolve, reject) =>
queue.start((error) => {
if (error) {
reject(error);
}
resolve();
})
);
}
private createLogoDownloader = (manufacturer: Manufacturer) => {
return async () => {
try {
const msg = `Logo of ${this.chalk.bold(manufacturer.name)} `;
const document = await this.loadDocument(
Url.Manufacturer(manufacturer.url)
);
let logoUrl = document(Selectors.ManufacturerLogo).attr("src");
if (!logoUrl) {
throw new Error(`${msg} ${this.chalk.red("not found")}`);
}
const extension = this.getFileExtension(logoUrl);
const url = this.fixUrl(logoUrl);
const slug = this.slugify(manufacturer.name).toLowerCase();
await this.downloadFile(url, `./logos/${slug}.${extension}`);
this.manufacturersLogos.push({name: manufacturer.name, url: logoUrl});
console.log(`${msg} ${this.chalk.green("downloaded")}.`);
} catch (e) {
console.log(e);
}
};
};
public async run() {
try {
console.log("Started parsing.");
await this.recognizeManufacturers();
console.log(`Recognized ${this.manufacturers.length} manufacturers.`);
await this.downloadLogos();
console.log(
`Downloaded ${this.chalk.bold(
this.manufacturersLogos.length
)} manufacturers logos.`
);
// TODO save to json
console.log("Finished.");
} catch (e) {
console.error(e);
}
}
}
export default LogosScrapper;