Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*Download ATels as Raw HTML files* 

5 

6:Author: 

7 David Young 

8""" 

9from __future__ import print_function 

10from builtins import range 

11from builtins import object 

12import sys 

13import os 

14os.environ['TERM'] = 'vt100' 

15from fundamentals import tools 

16import requests 

17import re 

18import random 

19from time import sleep 

20import codecs 

21 

22 

23class download(object): 

24 """ 

25 *Download ATels as Raw HTML files* 

26 

27 **Key Arguments** 

28 

29 - ``log`` -- logger 

30 - ``settings`` -- the settings dictionary 

31 

32 **Usage** 

33 

34 To setup your logger, settings and database connections, please use the ``fundamentals`` package (`see tutorial here <http://fundamentals.readthedocs.io/en/latest/#tutorial>`_).  

35 

36 To initiate a download object, use the following: 

37 

38 ```python 

39 from atelParser import download 

40 atels = download( 

41 log=log, 

42 settings=settings 

43 )  

44 ``` 

45 """ 

46 # Initialisation 

47 

48 def __init__( 

49 self, 

50 log, 

51 settings=False, 

52 

53 ): 

54 self.log = log 

55 log.debug("instansiating a new 'download' object") 

56 self.settings = settings 

57 self.maxsleep = 180 

58 

59 return None 

60 

61 def get_latest_atel_number( 

62 self): 

63 """*get latest atel number by parsing the RSS feed for the ATel site* 

64 

65 **Return** 

66 

67 - ``number`` -- the number of the latest ATel 

68 

69 **Usage** 

70 

71 ```python 

72 from atelParser import download 

73 atels = download( 

74 log=log, 

75 settings=settings 

76 ) 

77 latestNumber = atels.get_latest_atel_number() 

78 ``` 

79 """ 

80 self.log.debug('starting the ``get_latest_atel_number`` method') 

81 

82 # DOWNLOAD THE RSS FEED FOR ATELS 

83 try: 

84 headers = { 

85 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} 

86 response = requests.get( 

87 url="http://www.astronomerstelegram.org/?rss", 

88 headers=headers, 

89 ) 

90 content = str(response.content) 

91 status_code = response.status_code 

92 except requests.exceptions.RequestException: 

93 print('HTTP Request failed') 

94 sys.exit(0) 

95 

96 # PARSE ALL ATEL LINKS REPORTED 

97 matchObjectList = re.finditer( 

98 r'astronomerstelegram.org\/\?read\=(\d+)', content) 

99 atelNumbers = [] 

100 atelNumbers[:] = [match.group(1) for match in matchObjectList] 

101 

102 # FIND HIGHEST ATEL NUMBER 

103 atelNumbers = sorted(atelNumbers) 

104 number = int(atelNumbers[-1]) 

105 

106 self.log.debug('completed the ``get_latest_atel_number`` method') 

107 return number 

108 

109 def get_list_of_atels_still_to_download( 

110 self): 

111 """*get list of atels still to download by determining which ATels have been downloaded and diffing this against the latest ATel number* 

112 

113 **Return** 

114 

115 - ``atelNumbersToDownload`` -- a list of the ATel numbers that need downloaded 

116 

117 **Usage** 

118 

119 ```python 

120 from atelParser import download 

121 atels = download( 

122 log=log, 

123 settings=settings 

124 ) 

125 atelsToDownload = atels.get_list_of_atels_still_to_download()  

126 ``` 

127 

128 """ 

129 self.log.debug( 

130 'starting the ``get_list_of_atels_still_to_download`` method') 

131 

132 basePath = self.settings["atel-directory"] 

133 

134 atelDownloaded = [] 

135 atelDownloaded[:] = [int(d.replace(".html", "")) for d in os.listdir(basePath) if os.path.isfile( 

136 os.path.join(basePath, d)) and ".html" in d] 

137 

138 latestNumber = self.get_latest_atel_number() 

139 

140 allAtels = list(range(1, latestNumber + 1, 1)) 

141 atelNumbersToDownload = [] 

142 atelNumbersToDownload[:] = [ 

143 m for m in allAtels if m not in atelDownloaded] 

144 

145 self.log.debug( 

146 'completed the ``get_list_of_atels_still_to_download`` method') 

147 return atelNumbersToDownload 

148 

149 def download_list_of_atels( 

150 self, 

151 atelNumbers): 

152 """*download the HTML files of all the missing ATels* 

153 

154 **Key Arguments** 

155 

156 - ``atelNumbers`` -- the list of ATel numbers to download 

157 

158 **Usage** 

159 

160 To download new and missing ATel to your ``atel-directory`` use this code: 

161 

162 ```python 

163 from atelParser import download 

164 atels = download( 

165 log=log, 

166 settings=settings 

167 ) 

168 atelsToDownload = atels.get_list_of_atels_still_to_download() 

169 atels.download_list_of_atels(atelsToDownload) 

170 ``` 

171 

172 """ 

173 self.log.debug('starting the ``download_list_of_atels`` method') 

174 

175 for atel in atelNumbers: 

176 wait = random.randint(1, self.maxsleep) 

177 print( 

178 "Waiting for a randomly selected %(wait)ss before downloading ATel #%(atel)s" % locals()) 

179 sleep(wait) 

180 url = 'http://www.astronomerstelegram.org/?read=%(atel)s' % locals( 

181 ) 

182 headers = { 

183 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} 

184 response = requests.get(url, headers=headers) 

185 pathToWriteFile = self.settings[ 

186 "atel-directory"] + "/%(atel)0.8d.html" % locals() 

187 try: 

188 self.log.debug("attempting to open the file %s" % 

189 (pathToWriteFile,)) 

190 writeFile = codecs.open( 

191 pathToWriteFile, encoding='utf-8', mode='w') 

192 except IOError as e: 

193 message = 'could not open the file %s' % (pathToWriteFile,) 

194 self.log.critical(message) 

195 raise IOError(message) 

196 writeFile.write(response.content.decode("utf8")) 

197 writeFile.close() 

198 

199 self.log.debug('completed the ``download_list_of_atels`` method') 

200 return None