-
Notifications
You must be signed in to change notification settings - Fork 0
/
httpauth.py
68 lines (61 loc) · 2.03 KB
/
httpauth.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/python
from bs4 import BeautifulSoup
import urllib2
import base64
import re
import io
class WebsiteModul(object):
def __init__(self,url,username,password,regex):
try:
self.url = url
self.username = username
self.password = password
self.regex = regex
# MakeRequest
request = urllib2.Request(url)
# Authentication
auth64 = base64.encodestring('%s:%s' % (username,password)).replace('\n', '')
request.add_header("Authorization", "Basic %s" % auth64)
# realRequest
content = urllib2.urlopen(request)
# CreateRegex
soup = BeautifulSoup(content)
# parse and find pdfs
a = ""
for tag in soup.findAll('a', attrs={'href': re.compile(regex)}):
#print tag['href']
self.download(tag['href'],password,username)
except urllib2.URLError as err: pass # Later: log_err or sth like that, look at log class...
def download(self,url,password,username):
# concat url
url = "http://www.pi2.uni-stuttgart.de/cms/" + url
# Get Filename
file_name = url.split('/')[-1]
# MakeRequest
request = urllib2.Request(url)
# Authentication
auth64 = base64.encodestring('%s:%s' % (username,password)).replace('\n', '')
request.add_header("Authorization", "Basic %s" % auth64)
# realRequest
u = urllib2.urlopen(request)
# FileHandling
f = open(file_name, 'wb')
# FileMetaData
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
print "Downloading: %s Bytes: %s" % (file_name, file_size)
# Downlaod from https://stackoverflow.com/questions/22676/how-do-i-download-a-file-over-http-using-python
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
status = status + chr(8)*(len(status)+1)
print status
f.close()
# Watch out, password and username are hidden...
test = WebsiteModul("http://www.pi2.uni-stuttgart.de/cms/index.php?article_id=207","username", "password", "[A-Z]\w+[.]+[pdf]")