Code:
import cookielib, urllib2, urllib
import re
import sys, os, traceback
import logging
import time
userid = "YOUR.USERID@EMAIL.WHEREVER"
password = "SEEEEEKRIT"
loginURL = "http://www.filesonic.com/user/login"
dataURLs = """
http://www.filesonic.com/file/151801321/US_TDY_2011-03-02.rar
"""
def dump_cookies(cookieJar):
# Print out the HTTP cookies in a CookieJar object
#
# cookieJar = a CookieJar object to dump
logging.debug("Cookies:")
for c in cookieJar:
logging.debug("%s = '%s'", c.name, c.value)
def login_filesonic(userid, password):
# Log-in to Filesonic by passing the userid/password and storing the resulting auth cookie
#
# userid = filesonic account userid
# password = filesonic account password
#
# returns an urllib2.OpenerDirector object
logging.debug("Preparing login data...")
loginData = dict()
loginData["email"] = userid
loginData["redirect"] = "/"
loginData["password"] = password
loginData["rememberMe"] = "1"
encodedLoginData = urllib.urlencode(loginData)
logging.debug("Generated encoded login data = '%s'", encodedLoginData)
logging.debug("Creating urllib2 opener...")
cookieJar = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar))
logging.info("Opening login URL, '%s'...", loginURL)
page = opener.open(loginURL, encodedLoginData)
dump_cookies(cookieJar)
return opener
def byteSizeString(byteCount):
# Format a data count in a convenient human-readable form-- i.e. as X.XX GB, X.XX MB, X.XX kB, or X bytes
#
# byteCount = number of bytes of data
#
# returns the formatted string
if (byteCount > 1024*1024*1024):
return "%.2f GB" % (float(byteCount) / float(1024*1024*1024))
elif (byteCount > 1024*1024):
return "%.2f MB" % (float(byteCount) / float(1024*1024))
elif (byteCount > 1024):
return "%.2f kB" % (float(byteCount) / float(1024))
else:
return "%d bytes" % (byteCount)
def timeString(seconds):
# Format a time in a convenient human-readable form-- i.e. as H:MM:SS, M:SS, S sec
#
# seconds = number of seconds
#
# returns the formatted string
timeStr = ""
seconds = int(seconds)
hours = int(seconds / (60*60))
seconds -= hours * 60*60
mins = int(seconds / 60)
seconds -= mins * 60
if (hours > 0):
timeStr = "%d:%02d:%02d" % (hours, mins, seconds)
elif (mins > 0):
timeStr = "%d:%02d" % (mins, seconds)
else:
timeStr = "%d sec" % (seconds)
return timeStr
def dump_headers(headers):
# Log a list of headers
#
# headers = the list of headers to log (presumably from a mimetools.Message object returned by the info() method of a ulrlib2.open() response object)
logging.debug("Headers:")
for i,h in enumerate(headers):
while (h[-1] == '\n' or h[-1] == '\r'):
h = h[:-1]
logging.debug("%d: %s" % (i,h))
def fetch_data_URL(opener, dataURL):
# Retrieve and save a Hotfile data URL. File is saved in the current directory, named according to the filename returned by Hotfile.
#
# opener = an urllib2.OpenerDirector object with an authorization cookie in the contained CookieJar
# dataURL = the hotfile URL to fetch
logging.info("Fetching data page '%s'...", dataURL)
page = opener.open(dataURL)
logging.debug("Response URL = '%s'", page.geturl())
logging.info("Successfully received headers; processing...")
metadata = page.info()
dump_headers(metadata.headers)
# Check the returned headers; this could throw an exception if Content-Length or Content-Disposition are missing
# If the headers are not as expected, the file has probably been removed from Hotfile
contentType = metadata.getheader("Content-Type")
contentTransferEncoding = metadata.getheader("Content-Transfer-Encoding")
contentDisposition = metadata.getheader("Content-Disposition")
contentLength = int(metadata.getheader("Content-Length"))
matches = re.search(r'filename="(.+)"', contentDisposition)
filename = matches.group(1)
logging.debug("Content Type = %s", contentType)
logging.debug("Content Length = %s", contentLength)
logging.debug("Content Transfer Encoding = %s", contentTransferEncoding)
logging.debug("Content Disposition = %s", contentDisposition)
logging.debug("filename = %s", filename)
if (contentType[0] == '"' and contentType[-1] == '"'):
contentType = contentType[1:-1]
logging.debug("Trimmed Content Type = %s", contentType)
if (contentType != "application/octet-stream"):
logging.error("Unknown content type, '%s'; skipping URL", contentType)
raise Exception("Unknown content type, '%s'" % (contentType))
if (contentTransferEncoding != "binary" and contentTransferEncoding != None):
logging.error("Unknown transfer encoding, '%s'; skipping URL", contentTransferEncoding)
raise Exception("Unknown transfer encoding, '%s'" % (contentTransferEncoding))
logging.info("Opening output data file, '%s'...", filename)
dataFile = open(filename, "wb")
logging.info("Fetching %d bytes of data...", contentLength)
# We print a lot of progress info, which requires a lot of state variables...
rateTuples = [ ]
startTime = time.time()
prevTimeInt = int(startTime)
bytesRead = 0
avgRate = 0.0
currentRateStr = "0.0 kB/s"
overallRateStr = "0.0 kB/s"
totalTimeStr = "0sec"
timeLeftStr = "0sec"
contentLengthStr = byteSizeString(contentLength)
prevStatusLineLength = 0
while (bytesRead < contentLength):
# Read and save the next chunk of bytes
byteCount = 16384
if (contentLength - bytesRead < byteCount):
byteCount = contentLength - bytesRead
dataChunk = page.read(byteCount)
dataFile.write(dataChunk)
bytesRead += len(dataChunk)
bytesReadStr = byteSizeString(bytesRead)
percentCompleteStr = "%.1f" % (float(bytesRead * 100.0) / contentLength)
# Calculate elapsed time; only recalc the download rates once per second
currentTime = time.time()
currentTimeInt = int(currentTime)
if (currentTimeInt > prevTimeInt):
# Maintain a window of the last 10 seconds
if (len(rateTuples) >= 10):
del rateTuples[0]
timeTuple = (currentTimeInt, bytesRead)
rateTuples.append(timeTuple)
prevTimeInt = currentTimeInt
if (len(rateTuples) >= 2):
first = rateTuples[0]
last = rateTuples[-1]
timeDelta = last[0] - first[0]
byteDelta = last[1] - first[1]
avgRate = float(byteDelta) / float(timeDelta)
currentRateStr = "%.2f kB/s" % (float(avgRate) / 1024.0)
# How much data is left to read?
bytesLeft = contentLength - bytesRead
if (avgRate > 0.0):
timeLeft = int(bytesLeft / avgRate)
timeLeftStr = timeString(timeLeft)
totalTime = currentTime - startTime
totalTimeStr = timeString(totalTime)
if (totalTime > 0):
overallRate = (float(bytesRead) / 1024.0) / totalTime
overallRateStr = "%.2f kB/s" % (overallRate)
# Format and print the status line, taking care to erase junk at the end if necessary
statusLine = "Read %s / %s = %s%% ; current rate = %s, overall rate = %s; elapsed time = %s, time remaining = %s" % (bytesReadStr, contentLengthStr, percentCompleteStr, currentRateStr, overallRateStr, totalTimeStr, timeLeftStr)
newStatusLineLength = len(statusLine)
if (newStatusLineLength < prevStatusLineLength):
statusLine += " "*(prevStatusLineLength - newStatusLineLength)
prevStatusLineLength = newStatusLineLength
print "\r",statusLine,
sys.stdout.flush()
print
logging.info("Closing data file....")
dataFile.flush()
os.fsync(dataFile)
dataFile.close()
#logging.getLogger().setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.INFO)
opener = login_filesonic(userid, password)
for l in dataURLs.splitlines():
if (len(l) > 0):
logging.info("Processing URL '%s'", l)
try:
fetch_data_URL(opener, l)
#print "pass...."
except KeyboardInterrupt:
logging.error("Caught KeyboardInterrupt exception; halting....")
break
except:
exceptionType = str(sys.exc_info()[0])
exceptionValue = str(sys.exc_info()[1])
exceptionTrace = traceback.format_exc()
logging.error("An exception occurred while fetching URL '%s'", l)
logging.error("Exception type = %s", exceptionType)
logging.error("Exception value = %s", exceptionValue)
logging.error("Exception trace = %s", exceptionTrace)
logging.error("Skipping URL, '%s'.", l)
logging.info("----------------------------------")
print "Exiting."
Bookmarks