-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWebScraper.java
More file actions
38 lines (36 loc) · 1.48 KB
/
WebScraper.java
File metadata and controls
38 lines (36 loc) · 1.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Date;
public class WebScraper{
public static void main(String[] args) {
String url = "http://quotes.toscrape.com"; // target website
String csvFilePath = "quotes.csv"; // output CSV file
try (FileWriter writer = new FileWriter(csvFilePath)) {
// Write header row
writer.append("S.No,Quote,ScrapedAt\n");
// Fetch the HTML page
Document doc = Jsoup.connect(url).get();
// Select quote elements
Elements quotes = doc.select(".quote span.text");
int rowCount = 1;
for (Element quote : quotes) {
String cleanQuote = quote.text()
.replace("“", "\"")
.replace("”", "\"")
.replace("’", "'")
.replace("‘", "'");
// Escape quotes in CSV
cleanQuote = "\"" + cleanQuote.replace("\"", "\"\"") + "\"";
writer.append(rowCount + "," + cleanQuote + "," + new Date().toString() + "\n");
rowCount++;
}
System.out.println("Scraping completed! Data saved to " + csvFilePath);
} catch (IOException e) {
e.printStackTrace();
}
}
}