Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Download ATels as Raw HTML files*
6:Author:
7 David Young
8"""
9from __future__ import print_function
10from builtins import range
11from builtins import object
12import sys
13import os
14os.environ['TERM'] = 'vt100'
15from fundamentals import tools
16import requests
17import re
18import random
19from time import sleep
20import codecs
23class download(object):
24 """
25 *Download ATels as Raw HTML files*
27 **Key Arguments**
29 - ``log`` -- logger
30 - ``settings`` -- the settings dictionary
32 **Usage**
34 To setup your logger, settings and database connections, please use the ``fundamentals`` package (`see tutorial here <http://fundamentals.readthedocs.io/en/latest/#tutorial>`_).
36 To initiate a download object, use the following:
38 ```python
39 from atelParser import download
40 atels = download(
41 log=log,
42 settings=settings
43 )
44 ```
45 """
46 # Initialisation
48 def __init__(
49 self,
50 log,
51 settings=False,
53 ):
54 self.log = log
55 log.debug("instansiating a new 'download' object")
56 self.settings = settings
57 self.maxsleep = 180
59 return None
61 def get_latest_atel_number(
62 self):
63 """*get latest atel number by parsing the RSS feed for the ATel site*
65 **Return**
67 - ``number`` -- the number of the latest ATel
69 **Usage**
71 ```python
72 from atelParser import download
73 atels = download(
74 log=log,
75 settings=settings
76 )
77 latestNumber = atels.get_latest_atel_number()
78 ```
79 """
80 self.log.debug('starting the ``get_latest_atel_number`` method')
82 # DOWNLOAD THE RSS FEED FOR ATELS
83 try:
84 headers = {
85 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
86 response = requests.get(
87 url="http://www.astronomerstelegram.org/?rss",
88 headers=headers,
89 )
90 content = str(response.content)
91 status_code = response.status_code
92 except requests.exceptions.RequestException:
93 print('HTTP Request failed')
94 sys.exit(0)
96 # PARSE ALL ATEL LINKS REPORTED
97 matchObjectList = re.finditer(
98 r'astronomerstelegram.org\/\?read\=(\d+)', content)
99 atelNumbers = []
100 atelNumbers[:] = [match.group(1) for match in matchObjectList]
102 # FIND HIGHEST ATEL NUMBER
103 atelNumbers = sorted(atelNumbers)
104 number = int(atelNumbers[-1])
106 self.log.debug('completed the ``get_latest_atel_number`` method')
107 return number
109 def get_list_of_atels_still_to_download(
110 self):
111 """*get list of atels still to download by determining which ATels have been downloaded and diffing this against the latest ATel number*
113 **Return**
115 - ``atelNumbersToDownload`` -- a list of the ATel numbers that need downloaded
117 **Usage**
119 ```python
120 from atelParser import download
121 atels = download(
122 log=log,
123 settings=settings
124 )
125 atelsToDownload = atels.get_list_of_atels_still_to_download()
126 ```
128 """
129 self.log.debug(
130 'starting the ``get_list_of_atels_still_to_download`` method')
132 basePath = self.settings["atel-directory"]
134 atelDownloaded = []
135 atelDownloaded[:] = [int(d.replace(".html", "")) for d in os.listdir(basePath) if os.path.isfile(
136 os.path.join(basePath, d)) and ".html" in d]
138 latestNumber = self.get_latest_atel_number()
140 allAtels = list(range(1, latestNumber + 1, 1))
141 atelNumbersToDownload = []
142 atelNumbersToDownload[:] = [
143 m for m in allAtels if m not in atelDownloaded]
145 self.log.debug(
146 'completed the ``get_list_of_atels_still_to_download`` method')
147 return atelNumbersToDownload
149 def download_list_of_atels(
150 self,
151 atelNumbers):
152 """*download the HTML files of all the missing ATels*
154 **Key Arguments**
156 - ``atelNumbers`` -- the list of ATel numbers to download
158 **Usage**
160 To download new and missing ATel to your ``atel-directory`` use this code:
162 ```python
163 from atelParser import download
164 atels = download(
165 log=log,
166 settings=settings
167 )
168 atelsToDownload = atels.get_list_of_atels_still_to_download()
169 atels.download_list_of_atels(atelsToDownload)
170 ```
172 """
173 self.log.debug('starting the ``download_list_of_atels`` method')
175 for atel in atelNumbers:
176 wait = random.randint(1, self.maxsleep)
177 print(
178 "Waiting for a randomly selected %(wait)ss before downloading ATel #%(atel)s" % locals())
179 sleep(wait)
180 url = 'http://www.astronomerstelegram.org/?read=%(atel)s' % locals(
181 )
182 headers = {
183 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
184 response = requests.get(url, headers=headers)
185 pathToWriteFile = self.settings[
186 "atel-directory"] + "/%(atel)0.8d.html" % locals()
187 try:
188 self.log.debug("attempting to open the file %s" %
189 (pathToWriteFile,))
190 writeFile = codecs.open(
191 pathToWriteFile, encoding='utf-8', mode='w')
192 except IOError as e:
193 message = 'could not open the file %s' % (pathToWriteFile,)
194 self.log.critical(message)
195 raise IOError(message)
196 writeFile.write(response.content.decode("utf8"))
197 writeFile.close()
199 self.log.debug('completed the ``download_list_of_atels`` method')
200 return None