Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
#!/usr/local/bin/python # encoding: utf-8 Documentation for polyglot can be found here: http://pypolyglot.readthedocs.org/en/stable
Translate documents and webpages to various markup languages and document formats (html, epub, mobi ..)
Usage: polyglot init polyglot [-oc] (pdf|html|epub|mobi) <url> [<destinationFolder> -f <filename> -s <pathToSettingsFile>] polyglot kindle <url> [-f <filename> -s <pathToSettingsFile>] polyglot [-o] (epub|mobi) <docx> [<destinationFolder> -f <filename> -s <pathToSettingsFile>] polyglot kindle <docx> [-f <filename> -s <pathToSettingsFile>] polyglot [-o] kindleNB2MD <notebook> [<destinationFolder> -s <pathToSettingsFile>]
Options: init setup the polyglot settings file for the first time pdf print webpage to pdf html parse and download webpage to a local HTML document epub generate an epub format book from a webpage URL kindle send webpage article straight to kindle
-h, --help show this help message -v, --version show version -o, --open open the document after creation -c, --clean add polyglot's clean styling to the output document <url> the url of the article's webpage <docx> path to a DOCX file -s <pathToSettingsFile>, --settings <pathToSettingsFile> path to alternative settings file (optional) <destinationFolder> the folder to save the parsed PDF or HTML document to (optional) -f <filename>, --filename <filename> the name of the file to save, otherwise use webpage title as filename (optional) """ ################# GLOBAL IMPORTS #################### # from ..__init__ import *
""" *The main function used when ``cl_utils.py`` is run as a single script from the cl, or when installed as a cl command* """ # setup the command-line util settings su = tools( arguments=arguments, docString=__doc__, logLevel="WARNING", options_first=False, projectName="polyglot" ) arguments, settings, log, dbConn = su.setup()
# unpack remaining cl arguments using `exec` to setup the variable names # automatically for arg, val in arguments.iteritems(): if arg[0] == "-": varname = arg.replace("-", "") + "Flag" else: varname = arg.replace("<", "").replace(">", "") if isinstance(val, str) or isinstance(val, unicode): exec(varname + " = '%s'" % (val,)) else: exec(varname + " = %s" % (val,)) if arg == "--dbConn": dbConn = val log.debug('%s = %s' % (varname, val,))
## START LOGGING ## startTime = times.get_now_sql_datetime() log.info( '--- STARTING TO RUN THE cl_utils.py AT %s' % (startTime,))
# for k, v in locals().iteritems(): # print k, v
if not destinationFolder: destinationFolder = os.getcwd() if not filenameFlag: filenameFlag = False if not cleanFlag: readability = False else: readability = True
if init: from os.path import expanduser home = expanduser("~") filepath = home + "/.config/polyglot/polyglot.yaml" try: cmd = """open %(filepath)s""" % locals() p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) except: pass try: cmd = """start %(filepath)s""" % locals() p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) except: pass
if pdf and url: filepath = printpdf.printpdf( log=log, settings=settings, url=url, folderpath=destinationFolder, title=filenameFlag, append=False, readability=readability ).get()
if html and url:
cleaner = htmlCleaner.htmlCleaner( log=log, settings=settings, url=url, outputDirectory=destinationFolder, title=filenameFlag, # SET TO FALSE TO USE WEBPAGE TITLE, style=cleanFlag, # add polyglot's styling to the HTML document metadata=True, # include metadata in generated HTML (e.g. title), h1=True # include title as H1 at the top of the doc ) filepath = cleaner.clean()
if epub: if url: iinput = url else: iinput = docx from polyglot import ebook epub = ebook( log=log, settings=settings, urlOrPath=iinput, title=filenameFlag, bookFormat="epub", outputDirectory=destinationFolder ) filepath = epub.get()
if mobi: if url: iinput = url else: iinput = docx from polyglot import ebook mobi = ebook( log=log, settings=settings, urlOrPath=iinput, title=filenameFlag, bookFormat="mobi", outputDirectory=destinationFolder, ) filepath = mobi.get()
if kindle: if url: iinput = url else: iinput = docx from polyglot import kindle sender = kindle( log=log, settings=settings, urlOrPath=iinput, title=filenameFlag ) success = sender.send()
if kindleNB2MD: basename = os.path.basename(notebook) extension = os.path.splitext(basename)[1] filenameNoExtension = os.path.splitext(basename)[0] if destinationFolder: filepath = destinationFolder + "/" + filenameNoExtension + ".md" else: filepath = notebook.replace("." + extension, ".md") from polyglot.markdown import kindle_notebook nb = kindle_notebook( log=log, kindleExportPath=notebook, outputPath=filepath ) nb.convert()
if openFlag: try: cmd = """open %(filepath)s""" % locals() p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) except: pass try: cmd = """start %(filepath)s""" % locals() p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) except: pass
if "dbConn" in locals() and dbConn: dbConn.commit() dbConn.close() ## FINISH LOGGING ## endTime = times.get_now_sql_datetime() runningTime = times.calculate_time_difference(startTime, endTime) log.info('-- FINISHED ATTEMPT TO RUN THE cl_utils.py AT %s (RUNTIME: %s) --' % (endTime, runningTime, ))
return
main() |