In questo esempio si vede come lavora il parser HTML integrato in Python
questo programmino serve per alimentare la presente Wiki con documentazioni varie provenienti da HowToForge
Nell'esempio sono state usate solo librerie integrate in Python e la versione usata è la 2.7.x
#!/usr/bin/python # ############################################################################################### # HowToForge to Wiki converter # ############################################################################################### # Notes : Originaly developed in Embarcadero Delphi 2007 ... Ported to python for cross-platform # compatibility # Date : 20121128 # Author : Ascanio Pressato # CoAuthor: Massimo Fioravanti # Lic. : Creative Commen # ############################################################################################### import urllib2 import sys import re import os import argparse import textwrap import base64 from HTMLParser import HTMLParser from htmlentitydefs import name2codepoint # Const KBeginH1 = '===== ' KEndH1 = ' =====' KBeginH2 = '==== ' KEndH2 = ' ====' KBeginH3 = '=== ' KEndH3 = ' ===' KBeginH4 = '== ' KEndH4 = ' ==' KBeginH5 = '= ' KEndH5 = ' =' KBold = '**' KItalic = '//' KUnderline = '__' KBeginStrike = '<del>' KEndStrike = '</del>' KCodeStr = "''" KBeginLink = '[[' KEndLink = ']]' KNumList = ' - ' KSymbolList = ' * ' KLine = '----' KBeginKey = '<key>' KEndKey = '</key>' KBeginBox = '<box>' KEndBox = '</box>' KBeginFile = '<file>' KEndFile = '</file>' KBeginCode = '<code>' KEndCode = '
'
KBeginNote = '
'
KBeginImage = 'kendimage'
KParagraph = r'
'
KSearchBeginTitle = r“(<title>)” KSearchEndTitle = r“(<\/title>)”
KInvalidUrlChar = ' :.,\/+*'
KValidUrlChar = .ljust(len(KInvalidUrlChar),'_')
KBeginData = “<!– begin content –>”
KEndData = “
” KEndData2 = “<!– AddThis Button END –>” KEndData3 = “
”
OutPutWiki = [] SlClassCSS = [] CodeMode = False IsScript = False IsNote = False IsCmd = False OkTrim = False
KBaseUrl = 'http://www.howtoforge.com/' CreditsString = “” CreditsInfo = [] canWork = False PassCount = 0 PagesList = [] KCreditsHeader = 'Revisioni'
BeginPage = -1 EndPage = -1
KVersion = “2.9 Python” KAuthor = “QXNjYW5pbyBQcmVzc2F0bw” KCoAuthor = “TWFzc2ltbyBGaW9yYXZhbnRp”
# Transcode Table #<p class=“command”></p> =⇒
#<p class=“system”></p> =⇒ Italic #<p></p> =⇒ Paragraph # =⇒ Italic # =⇒ Bold + Italic #<pre></pre> =⇒
#<br> =⇒ Paragraph #<a href=“” >text</a> =⇒ text #ImageComment
# Begin & End Of HTML to Wikize # #
# Sample for Credits Extraction #
Submitted by <a href=“forums/member.php?u=2” title=“View user profile.” rel=“nofollow”>falko</a> (<a href=“forums/private.php?do=newpm&u=2” title=“Contact author.” rel=“nofollow”>Contact Author</a>) (<a href=“forums” title=“Forums.”>Forums</a>) on Sun, 2012-10-21 17:33. :: <a href=“sitemap/control-panels/ispconfig”>ISPConfig</a> | <a href=“sitemap/linux/ubuntu”>Ubuntu</a> | <a href=“sitemap/web-server”>Web Server</a> | <a href=“sitemap/web-server/apache”>Apache</a> | <a href=“sitemap/control-panels”>Control Panels</a>
#
# ############################################################################################### # VCL # ###############################################################################################
def valueOf(List, Name):
result = ""
for eName, eValue in List:
if eName == Name:
result = eValue
return result
def SameText(str1, str2):
result = (str1.lower() == str2.lower()) return result
def IncludeTrailingPathDelimiter(aPath):
result = "" result = os.path.join(aPath, '') return result
def NameSpaceToPath(aNameSpace):
result = ""
NS = aNameSpace.split(':')
for elem in NS:
result = os.path.join(result, elem.replace(".", "_"))
return result
def ForceDirectories(aPath):
if not os.path.exists(aPath):
os.makedirs(aPath)
# ############################################################################################### # GENUTIL # ###############################################################################################
def SostituisciAllChar(St, Replaced, ReplacedTo):
result = St
for i in range(len(Replaced)):
result = result.replace(Replaced[i], ReplacedTo[i])
return result
# ############################################################################################### # PROGRAM # ###############################################################################################
def ResetAllVar ():
SlClassCSS = [] CodeMode = False IsScript = False IsNote = False IsCmd = False OkTrim = False
def Parse_Params ():
parser = argparse.ArgumentParser(usage = "%(prog)s [options]", version = "%(prog)s " + KVersion,
formatter_class=argparse.RawTextHelpFormatter,
description="Authors: " + base64.b64decode(KAuthor + '==') + " & " + base64.b64decode(KCoAuthor) + "\nConvert HowToForge.com Artivles in Dokuwiki Articles",
epilog="Example: \n %(prog)s -u http://www.howtoforge.com/perfect-server-ubuntu-12.04-lts-apache2-bind-dovecot-ispconfig-3 -p7 \n %(prog)s -u http://www.howtoforge.com/perfect-server-ubuntu-12.04-lts-apache2-bind-dovecot-ispconfig-3 -f1 -t7")
parser.add_argument("-p", "--pagecount", dest="pagecount", default=1,
help="Pages to download for this article [default: %(default)s]")
parser.add_argument("-f", "--frompage", dest="frompage",
help="Begin Page to download for this article (For Partial Download)")
parser.add_argument("-t", "--topage", dest="topage",
help="End Page to download for this article (For Partial Download)")
parser.add_argument("-u", "--url", dest="url", metavar="URL",
help="Url to convert")
parser.add_argument("-w", "--wikiNS", dest="wikiNS",
help="Wiki NameSpace")
parser.add_argument("-m", "--mediaNS", dest="mediaNS",
help="Media NameSpace")
parser.add_argument("-c", "--creditstable", dest="creditstable", action="store_true", default=True,
help="Include Credits & Info Table [default: %(default)s]")
parser.add_argument("-o", "--outfolder", dest="outfolder", metavar="FOLDER",
help="Folder to save results")
parser.add_argument("-n", "--flatnames", dest="useflatnames",
help="Begin Page to download for this article (For Partial Download)")
options = parser.parse_args()
if (options.url == None):
print "\nWarning: How the Hell I can convert something if you don't specify URL !!!!";
if (options.outfolder == None) or (options.url == None):
parser.print_help()
exit()
if (options.outfolder == None):
print "\nWarning: OutPut Folder not specified"; exit();
return options
def GetTitle(Html):
result = ""
match = re.search(KSearchBeginTitle, Html)
beginTitle = match.start(1)
match = re.search(KSearchEndTitle, Html)
endTitle = match.start(1)
result = Html[beginTitle + 7:endTitle]
result = result[:result.find('|')-1]
return result
def CleanHTML(Html, PassNo):
result = ""
if PassNo == 1:
Title = GetTitle(Html)
else:
Title = ""
match = re.search(r"[^a-zA-Z](" + KBeginData + ")[^a-zA-Z]", Html)
temp = Html[match.start(1):]
if PassNo == 1:
temp = temp[temp.replace('<p>', 'XXX', 1).find('<p>'):]
else:
temp = temp[temp.replace('</div>', 'XXX', 2).find('</div>')+5:]
if PassNo == 1:
result = "<html><body><h1>" + Title +"</h1><div class=\"autoinserted\">" + temp
else:
result = "<html><body><div class=\"autoinserted\">" + temp
match = re.search(r"(" + KEndData3 + ")", result)
result = result[:match.start(1)] + "</body></html>"
return result
def ExtractFileNameFromUrl(aUrl):
result = aUrl.split('/')[-1]
return result
def GetFlatUrlName(aUrl):
result = ''
UrlTokens = aUrl.split('/')
del UrlTokens[0]
del UrlTokens[0]
FileName = UrlTokens.pop()
for Token in UrlTokens:
result = result + Token.replace('.', '_') + '_'
result = result + FileName
return result
def GetPageOutputDir(aWikiNameSpace, aOutPutFolder):
result = ""
if (aWikiNameSpace != None):
result = NameSpaceToPath(aWikiNameSpace)
result = SostituisciAllChar(result, KInvalidUrlChar, KValidUrlChar)
result = IncludeTrailingPathDelimiter(IncludeTrailingPathDelimiter(aOutPutFolder) + result)
return result
def GetImageOutputDir(aWikiMediaNameSpace, aWikiNameSpace, aOutPutFolder):
NormalizedNameSpace = ""
if (aWikiMediaNameSpace != None):
NormalizedNameSpace = NameSpaceToPath(aWikiMediaNameSpace)
result = GetPageOutputDir(aWikiNameSpace, aOutPutFolder)
result = IncludeTrailingPathDelimiter(IncludeTrailingPathDelimiter(result) + NormalizedNameSpace)
return result
def GetImageName(aImgUrl, aWikiMediaNameSpace, aWikiNameSpace, aOutPutFolder, aUseFlatUrls):
if aUseFlatUrls:
result = GetImageOutputDir(aWikiMediaNameSpace, aWikiNameSpace, aOutPutFolder) + GetFlatUrlName(aImgUrl)
else:
result = GetImageOutputDir(aWikiMediaNameSpace, aWikiNameSpace, aOutPutFolder) + ExtractFileNameFromUrl(aImgUrl)
return result
def combOutput(aFile):
fd = open(aFile)
contents = fd.readlines()
fd.close()
new_contents = []
for line in contents:
# Strip whitespace, should leave nothing if empty line was just "\n"
if not line.strip():
continue
# We got something, save it
else:
new_contents.append(line)
print "Comb File: " + aFile
fo = file(aFile, 'w')
for x in "".join(new_contents):
fo.write(x)
fo.close()
def SaveToFile(WikiText, aWikiNameSpace, aOutPutFolder, aUrl):
MyDir = GetPageOutputDir(aWikiNameSpace, aOutPutFolder)
ForceDirectories(MyDir)
OutFileName = MyDir + GetFlatUrlName(aUrl).replace('.', '_') + '.txt'
print "Saving File: " + OutFileName
fo = file(OutFileName, 'w')
for x in WikiText:
fo.write(x)
fo.close()
combOutput(OutFileName)
def DownloadImage(Url, SaveName):
downloaddir = os.path.dirname(SaveName) ForceDirectories(downloaddir)
result = True
webFile = urllib2.urlopen(Url)
data = webFile.read()
try:
fileName = response.info()['content-disposition'].split('filename="')[1].split('"')[0]
except:
fileName = ExtractFileNameFromUrl(Url)
localFile = os.path.join(downloaddir, fileName.replace('%20','_'))
print "Download Image: ", localFile
with open(localFile, "wb") as image:
image.write(data)
image.close()
webFile.close()
return result
def CustomAdjustLine(aLine):
result = aLine
result = aLine.replace("<vhost>", "[vhost]").replace(" //", "//").replace('<--', '<-').replace(' \\', '\\')
return result
def CustomAdjustments(aData):
print "Custom Adjustments begin"
result = CustomAdjustLine(''.join(aData))
print "Custom Adjustments end"
return result
class MyCreditsParser(HTMLParser):
def handle_starttag(self, tag, attrs):
global CreditsString
global canWork
if SameText(tag, 'div') and SameText(valueOf(attrs, 'class'), 'info'):
canWork = True
if SameText(tag, 'a') and SameText(valueOf(attrs, 'title'), 'View user profile.'):
CreditsString = CreditsString + '^ Autore | [[' + KBaseUrl + valueOf(attrs, 'href') + '|'
def handle_endtag(self, tag):
global CreditsString
global canWork
global PassCount
if SameText(tag, 'div'):
canWork = False
if SameText(tag, 'a'):
if (canWork) and (PassCount <1):
CreditsString = CreditsString + ']] | \n'
PassCount = PassCount + 1
def handle_data(self, data):
global CreditsString
global canWork
if (canWork):
if (data.strip() != 'Submitted by') and (data.strip() != '(') and (data.strip() != ')') and (data.strip() != 'Forums') and (data.strip() !=') (') and (data.strip() !='Contact Author'):
match = re.search("(on (Mon|Tue|Wed|Thu|Fri|Sat|Sun), \d{4}-\d{2}-\d{2})", data)
if match and (match.groups() > 0):
CreditsString = CreditsString + '^ Ultima Modifica | ' + data[match.start(0)+3:match.end(0)].replace(',', '') + ' | \n'
canWork = False
else:
CreditsString = CreditsString + data.strip()
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
global SlClassCSS
global CodeMode
global IsScript
global OutPutWiki
global IsNote
global OkTrim
if SameText(tag, 'p') and SameText(valueOf(attrs, 'class'), 'command'):
OutPutWiki.append('\n' + KBeginCode + '\n')
CodeMode = True
OkTrim = True
elif SameText(tag, 'p') and SameText(valueOf(attrs, 'class'), 'system'):
OutPutWiki.append(KParagraph + KItalic)
OkTrim = True
elif SameText(tag, 'p') and SameText(valueOf(attrs, 'class'), 'highlight'):
OutPutWiki.append(KParagraph + KBold + KItalic + KCodeStr)
OkTrim = True
elif SameText(tag, 'p') and (valueOf(attrs, 'class') == ""):
OutPutWiki.append('\n' + KParagraph)
elif SameText(tag, 'div') and SameText(valueOf(attrs, 'class'), 'sponsor'):
OutPutWiki.append('\n' + KBeginNote + '\n')
IsNote = True
elif SameText(tag, 'h1'):
OutPutWiki.append('\n' + KBeginH1)
elif SameText(tag, 'h2'):
OutPutWiki.append('\n' + KBeginH2)
elif SameText(tag, 'h3'):
OutPutWiki.append('\n' + KBeginH3)
elif SameText(tag, 'h4'):
if IsNote:
OutPutWiki.append('\n' + KBold)
else:
OutPutWiki.append('\n' + KBeginH4)
elif SameText(tag, 'h5'):
OutPutWiki.append('\n' + KBeginH5)
elif SameText(tag, 'span') and SameText(valueOf(attrs, 'class'), 'system'):
OutPutWiki.append(KItalic + KCodeStr)
elif SameText(tag, 'span') and SameText(valueOf(attrs, 'class'), 'highlight'):
OutPutWiki.append(KBold + KItalic + KCodeStr)
elif SameText(tag, 'li'):
OutPutWiki.append('\n' + KSymbolList)
elif SameText(tag, 'br'):
if CodeMode:
OutPutWiki.append('\n')
else:
OutPutWiki.append(KParagraph + '\n')
elif SameText(tag, 'b') or SameText(tag, 'strong'):
OutPutWiki.append(KBold)
elif SameText(tag, 'i') or SameText(tag, 'em'):
OutPutWiki.append(KItalic)
elif SameText(tag, 'pre'):
OutPutWiki.append('\n' + KBeginFile + '\n')
CodeMode = True
elif SameText(tag, 'script'):
IsScript = True
elif SameText(tag, 'noscript'):
IsScript = True
elif SameText(tag, 'a') and not SameText(valueOf(attrs, 'class'), 'thickbox'):
OutPutWiki.append(KBeginLink + valueOf(attrs, 'href') + '|')
elif SameText(tag, 'img'):
if not SameText(valueOf(attrs, 'alt'), 'Click to enlarge'):
if DownloadImage(valueOf(attrs, 'src'), GetImageName(valueOf(attrs, 'src'), MyOptions.mediaNS, MyOptions.wikiNS, MyOptions.outfolder, MyOptions.useflatnames)):
MyNS = MyOptions.mediaNS
if MyNS == None:
MyNS = ""
else:
MyNS = ":" + MyNS + ":"
if MyOptions.useflatnames:
ImgWikiTag = '\n' + KBeginImage + MyNS + GetFlatUrlName(valueOf(attrs, 'src')) + '|' + KEndImage + '\n'
OutPutWiki.append(ImgWikiTag)
else:
ImgWikiTag = '\n' + KBeginImage + MyNS + ExtractFileNameFromUrl(valueOf(attrs, 'src')) + '|' + KEndImage + '\n'
OutPutWiki.append(ImgWikiTag)
else:
OutPutWiki.append('\n' + ' #IMG:' + valueOf(attrs, 'src') + ' (Failed to Download)' + '\n')
if (not SameText(tag, 'br')) and (not SameText(tag, 'img')):
SlClassCSS.insert(0, (tag, valueOf(attrs, 'class')));
def handle_endtag(self, tag):
global SlClassCSS
global CodeMode
global IsScript
global OutPutWiki
global IsNote
global OkTrim
if SameText(tag, 'p') and SameText(SlClassCSS[0][1], 'command'):
OutPutWiki.append('\n' + KEndCode + '\n')
CodeMode = False
OkTrim = False
elif SameText(tag, 'p') and SameText(SlClassCSS[0][1], 'highlight'):
OutPutWiki.append(KCodeStr + KItalic + KBold + KParagraph)
OkTrim = False
elif SameText(tag, 'p') and SameText(SlClassCSS[0][1], 'system'):
OutPutWiki.append(KItalic + KParagraph)
OkTrim = False
elif SameText(tag, 'p'):
OutPutWiki.append(KParagraph + '\n')
elif SameText(tag, 'div') and SameText(SlClassCSS[0][1], 'sponsor'):
OutPutWiki.append('\n' + KEndNote + '\n')
IsNote = False
elif SameText(tag, 'h1'):
OutPutWiki.append(KEndH1 +'\n')
elif SameText(tag, 'h2'):
OutPutWiki.append(KEndH2 +'\n')
elif SameText(tag, 'h3'):
OutPutWiki.append(KEndH3 +'\n')
elif SameText(tag, 'h4'):
if IsNote:
OutPutWiki.append(KBold + '\n' + '----' + '\n')
else:
OutPutWiki.append(KEndH4 +'\n')
elif SameText(tag, 'h5'):
OutPutWiki.append(KEndH5 +'\n')
elif SameText(tag, 'span') and SameText(SlClassCSS[0][1], 'system'):
OutPutWiki.append(KCodeStr + KItalic)
elif SameText(tag, 'span') and SameText(SlClassCSS[0][1], 'highlight'):
OutPutWiki.append(KCodeStr + KItalic + KBold)
elif SameText(tag, 'li'):
OutPutWiki.append('\n')
elif SameText(tag, 'b') or SameText(tag, 'strong'):
OutPutWiki.append(KBold + ' ')
elif SameText(tag, 'i') or SameText(tag, 'em'):
OutPutWiki.append(KItalic)
elif SameText(tag, 'pre'):
OutPutWiki.append('\n' + KEndFile + '\n')
CodeMode = False
elif SameText(tag, 'script'):
IsScript = False
elif SameText(tag, 'noscript'):
IsScript = False
elif SameText(tag, 'a') and not SameText(SlClassCSS[0][1], 'thickbox'):
OutPutWiki.append(KEndLink + ' ')
if (not SameText(tag, 'br')) and (not SameText(tag, 'img')):
if SameText(tag, SlClassCSS[0][0]):
del SlClassCSS[0]
def handle_entityref(self, name):
if name == 'gt':
OutPutWiki.append('>')
elif name == 'lt':
OutPutWiki.append('<')
else:
OutPutWiki.append(unichr(name2codepoint[name]).encode('utf8'))
def handle_data(self, data):
global CodeMode
global IsScript
global OkTrim
if (not IsScript):
if (CodeMode) or (OkTrim):
OutPutWiki.append(data.strip())
else:
OutPutWiki.append(data)
def getbaseurl(aUrl):
result = aUrl
match = re.search(r"(-p[0-9])", aUrl)
try:
EndUrl = match.start(1)
except:
EndUrl = len(aUrl)
result = aUrl[:EndUrl]
return result
def getutlforpage(aBaseUrl, aPageNo):
result = getbaseurl(aBaseUrl)
if aPageNo != 1:
result = result + "-p" + str(aPageNo)
return result
def getCredits(aCreditsTable):
result = KBeginH2 + KCreditsHeader + KEndH2 + "\n\n"
result = result + "^ " + KCreditsHeader +" ^^ \n" + aCreditsTable + "^ Pagine ^^ \n"
for Page in PagesList:
result = result + "| [[" + Page + "]] || \n"
return result
def ConvertPage(aUrl, aPassNo):
global CreditsInfo
print "Converting page: ", aUrl
# Track Pages
PagesList.append(aUrl)
# Reset Vars
ResetAllVar()
# Download HTML
usock = urllib2.urlopen(aUrl)
data = usock.read()
usock.close()
# Prima di eliminare tutti i dati dei credits dall'html ...
if CreditsInfo == []:
CreditsParser = MyCreditsParser()
CreditsParser.feed(data)
# Genero la tabella delle Revisioni / Credits
CreditsInfo = []
CreditsInfo.append(getCredits(CreditsString))
# Clean HTML
MyHtml = CleanHTML(data, aPassNo)
#SaveToFile(MyHtml, MyOptions.wikiNS, MyOptions.outfolder, aUrl + "_" + str(aPassNo))
# Parse HTML
parser = MyHTMLParser()
parser.feed(MyHtml)
def ConvertSinglePage(aUrl):
global OutPutWiki
ConvertPage(aUrl, 1)
# Conversioni "speciali"
OutPutWiki = CustomAdjustments(OutPutWiki)
# Aggiungo la CreditsTable in fondo all'articolo
if MyOptions.creditstable:
OutPutWiki.append('\n')
OutPutWiki.append(''.join(CreditsInfo))
# Save to File
SaveToFile(OutPutWiki, MyOptions.wikiNS, MyOptions.outfolder, aUrl)
def ConvertMultiPage(aUrl, aBeginPage, aEndPage):
global OutPutWiki
for idx in range(aBeginPage, aEndPage + 1):
ConvertPage(getutlforpage(MyOptions.url, idx), idx)
# Conversioni "speciali"
OutPutWiki = CustomAdjustments(OutPutWiki)
# Aggiungo la CreditsTable in fondo all'articolo
if MyOptions.creditstable:
OutPutWiki = OutPutWiki + '\n'
OutPutWiki = OutPutWiki + ''.join(CreditsInfo)
# Save to File
SaveToFile(OutPutWiki, MyOptions.wikiNS, MyOptions.outfolder, getutlforpage(MyOptions.url, 1))
if name == “main”:
MyOptions = Parse_Params()
if (MyOptions.wikiNS != None):
MyOptions.wikiNS = MyOptions.wikiNS.replace('.', '_')
if (MyOptions.mediaNS != None):
MyOptions.mediaNS = MyOptions.mediaNS.replace('.', '_')
print os.path.basename(sys.argv[0]) + " By " + base64.b64decode(KAuthor + '==') + " & " + base64.b64decode(KCoAuthor) print "Begin Convert"
if (MyOptions.frompage != None):
BeginPage = int(MyOptions.frompage)
if (MyOptions.topage != None):
EndPage = int(MyOptions.topage)
if (BeginPage != -1) and (EndPage == -1):
EndPage = BeginPage
if (BeginPage == -1) and (EndPage != -1):
BeginPage = EndPage
if (BeginPage == -1) and (EndPage == -1):
if int(MyOptions.pagecount) == 0:
ConvertSinglePage(MyOptions.url)
else:
BeginPage = 1
EndPage = int(MyOptions.pagecount)
if (BeginPage == EndPage):
ConvertSinglePage(MyOptions.url)
else:
ConvertMultiPage(MyOptions.url, BeginPage, EndPage)
print "Convert done"
</code>
Per testare il software mi sono fatto un piccolo batch
rem HTF2Wiki.py -u http://www.howtoforge.com/perfect-server-ubuntu-12.10-apache2-bind-dovecot-ispconfig-3-p2 -p7 -o .\Test -m linux:perfect_server_ubuntu_12.10
HTF2Wiki.py -u http://www.howtoforge.com/perfect-server-ubuntu-12.10-apache2-bind-dovecot-ispconfig-3-p2 -f1 -t7 -o .\Test -m linux:perfect_server_ubuntu_12.10
Le 2 righe sono equivalenti partono dalla prima pagina dell'articolo che si trova a quell'indirizzo e scaricano tutte e 7 le pagine dell'articolo in un unico file .txt scaricando anche le immagini
Allego anche il sorgente
Un esempio di download con la progressbar
import urllib2 url = "http://download.thinkbroadband.com/10MB.zip" file_name = url.split('/')[-1] u = urllib2.urlopen(url) f = open(file_name, 'wb') meta = u.info() file_size = int(meta.getheaders("Content-Length")[0]) print "Downloading: %s Bytes: %s" % (file_name, file_size) file_size_dl = 0 block_sz = 8192 while True: buffer = u.read(block_sz) if not buffer: break file_size_dl += len(buffer) f.write(buffer) status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size) status = status + chr(8)*(len(status)+1) print status, f.close()