-
Notifications
You must be signed in to change notification settings - Fork 20
/
spider_github_user.py
56 lines (48 loc) · 1.85 KB
/
spider_github_user.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import requests
from bs4 import BeautifulSoup
import pandas as pd
def get_userinfo_github(username):
user_link = 'https://github.com/' + str(username)
r = requests.get(user_link)
soup = BeautifulSoup(r.text)
name = soup.find('span', itemprop="name").text
additionalName = soup.find('span', itemprop="additionalName").text
try:
bio = soup.find('div', class_="p-note user-profile-bio js-user-profile-bio mb-3").text
except:
bio = ''
try:
org = soup.find('span', class_="p-org").text
except:
org = ''
try:
label = soup.find('span', class_="p-label").text
except:
label = ''
try:
email = soup.find('li', itemprop="email").text.strip()
except:
email = ''
try:
link_url = soup.find('li', itemprop="url").text.strip()
except:
link_url = ''
try:
count_ogz = len(soup.find_all('a', class_="avatar-group-item"))
except:
count_ogz = 0
try:
year_start = soup.find_all('a', class_="js-year-link filter-item px-3 mb-2 py-2 ")[-1].text
except:
year_start = ''
lst_line_nav = soup.find_all('a', class_="UnderlineNav-item ")
count_repositories = lst_line_nav[0].text.strip().split(' ')[-1]
count_projects = lst_line_nav[1].text.strip().split(' ')[-1]
count_stars = lst_line_nav[2].text.strip().split(' ')[-1]
count_followers = lst_line_nav[3].text.strip().split(' ')[-1]
count_following = lst_line_nav[4].text.strip().split(' ')[-1]
count_contributions = soup.find('h2', class_="f4 text-normal mb-2").text.strip().split(' ')[0]
return [name, additionalName, bio, org, label, email, link_url, count_ogz, year_start,
count_repositories, count_projects, count_stars, count_followers, count_following,
count_contributions]
print(get_userinfo_github('stormstone'))