-
Notifications
You must be signed in to change notification settings - Fork 0
/
webscraping.py
31 lines (26 loc) · 918 Bytes
/
webscraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# -*- coding: utf-8 -*-
"""webscraping.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1iXtl75NZJUJv3ZVMS_EKiASBgSm9fc8E
"""
from bs4 import BeautifulSoup
import requests
import csv
url = "https://www.bbc.com/news"
# fetch the content from url
response = requests.get(url)
html = response.content
# Parse HTML content
soup = BeautifulSoup(html, 'html.parser')
# Find elements containing headlines
elements = soup.find_all(["h2", "a"])
# Create a CSV file and write the data
with open('headlines.csv', 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['Headline', 'URL'])
for element in elements:
headline_text = element.text.strip()
headline_url = element.get('href')
writer.writerow([headline_text, headline_url])
print("Data scraping complete and saved to headlines;csv.")