-
Notifications
You must be signed in to change notification settings - Fork 1
/
gen.py
82 lines (65 loc) · 2.3 KB
/
gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#gen.py
# Make HTTP requests
import requests
# Scrape data from an HTML document
from bs4 import BeautifulSoup
# I/O
import os
# Search and manipulate strings
import re
GENIUS_API_TOKEN = client_access_token = 'q2iKacoJeJRs5q6UC1AnYWN4IriZnwgV4FhoXWxIERxORuOb9GxUdAbWDD_K1D5q'
# Get artist object from Genius API
def request_artist_info(artist_name, page):
base_url = 'https://api.genius.com'
headers = {'Authorization': 'Bearer ' + GENIUS_API_TOKEN}
search_url = base_url + '/search?per_page=10&page=' + str(page)
data = {'q': artist_name}
response = requests.get(search_url, data=data, headers=headers)
return response
# Get Genius.com song url's from artist object
def request_song_url(artist_name, song_cap):
page = 1
songs = []
while True:
response = request_artist_info(artist_name, page)
json = response.json()
# print("JJJ",json)
# Collect up to song_cap song objects from artist
song_info = []
for hit in json['response']['hits']:
if artist_name.lower() in hit['result']['primary_artist']['name'].lower():
song_info.append(hit)
print("HHHHHH:",hit)
# Collect song URL's from song objects
for song in song_info:
if (len(songs) < song_cap):
url = song['result']['url']
songs.append(url)
if (len(songs) == song_cap):
break
else:
page += 1
print('Found {} songs by {}'.format(len(songs), artist_name))
return songs
# DEMO
# print("!!!!!!",request_song_url('Lana Del Rey', 2))
# Scrape lyrics from a Genius.com song URL
def scrape_song_lyrics(url):
page = requests.get(url)
print(page)
html = BeautifulSoup(page.text, 'html.parser')
lyrics = html.find('div', class_='lyrics').get_text()
print(lyrics)
#remove identifiers like chorus, verse, etc
lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics)
#remove empty lines
lyrics = os.linesep.join([s for s in lyrics.splitlines() if s])
return lyrics
# DEMO
# print(scrape_song_lyrics('https://genius.com/Lana-del-rey-young-and-beautiful-lyrics'))
def searchLyrics(search, res = 1):
urls = request_song_url(search, res)
print(urls)
return scrape_song_lyrics(urls[0])
item = "hey ya"
searchLyrics(item)