This repository was archived by the owner on Apr 29, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathCrawler_Start.py
More file actions
66 lines (50 loc) · 1.61 KB
/
Crawler_Start.py
File metadata and controls
66 lines (50 loc) · 1.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Bu araç @keyiflerolsun tarafından | @KekikAkademi için yazılmıştır.
from FlaskAPI import konsol, FlaskAPIDB
from Lib import wiki_ver, indeed_ver, glassdoor_ver
from time import sleep
import json
with open("SETTINGS.json", "r+") as dosya:
sirketler = json.load(dosya)["Companies"]
from random import choice
with open("proxies.txt", "r+") as dosya:
PROXILER = [satir.replace("\n", "").strip() for satir in dosya]
def sirket_ekle(sirket:str):
db = FlaskAPIDB()
# if db.data_ver(sirket):
# return False
konsol.log(f"[yellow][💾] {sirket} Crawl Ediliyor")
try:
glass_veri = glassdoor_ver(sirket, choice(PROXILER) if PROXILER else None)
except Exception:
glass_veri = None
veri = {
"company" : sirket,
"wikipedia" : wiki_ver(sirket),
"indeed" : indeed_ver(sirket, choice(PROXILER) if PROXILER else None),
"glassdoor" : glass_veri
}
db.ekle(veri)
konsol.log(f"[green][✅] {sirket} Database'e Eklendi")
return True
def crawler_func():
for sirket in sirketler:
eklendi = sirket_ekle(sirket)
# break
if eklendi:
sleep(15)
import schedule
from time import sleep
schedule.every(7).days.do(crawler_func)
if __name__ == "__main__":
try:
crawler_func()
except Exception as hata:
konsol.log(f'[bold red]{type(hata).__name__} | {hata}[/]')
while True:
try:
schedule.run_pending()
except Exception as hata:
konsol.log(f"[bold red]{type(hata).__name__} | {hata}[/]")
continue
finally:
sleep(1)