Coverage for sherlock/imports/_base_importer.py: 95%

91 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-10-10 13:58 +0000

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*The base importer for sherlock catalogue imports* 

5 

6:Author: 

7 David Young 

8 

9.. todo :: 

10 

11 - document this module 

12""" 

13from __future__ import print_function 

14from builtins import str 

15from builtins import object 

16import sys 

17import os 

18os.environ['TERM'] = 'vt100' 

19import readline 

20import glob 

21import pickle 

22import codecs 

23import re 

24import string 

25from sherlock.database_cleaner import database_cleaner 

26from datetime import datetime, date, time 

27from docopt import docopt 

28from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables, directory_script_runner, writequery 

29from fundamentals.renderer import list_of_dictionaries 

30from HMpTy.mysql import add_htm_ids_to_mysql_database_table 

31 

32 

33class _base_importer(object): 

34 """ 

35 *The base importer object used to import new catalgues into sherlock-catalogues database* 

36 

37 **Key Arguments** 

38 

39 - ``log`` -- logger 

40 - ``settings`` -- the settings dictionary 

41 - ``pathToDataFIle`` -- path to the file containing the data to import 

42 - ``version`` -- version number of the catalogue to be imported (e.g. DR12) 

43 - ``catalogueName`` -- name of the catalogue to be imported 

44 - ``coordinateList`` -- list of coordinates (needed for some streamed tables) 

45 - ``radiusArcsec`` -- the radius in arcsec with which to perform the initial NED conesearch. Default *False* 

46 

47 

48 **Usage** 

49 

50 To use this base class to write a new importer, create your class like so: 

51 

52 ```python 

53 class newImporter(_base_importer): 

54 ... 

55 ``` 

56 

57 """ 

58 # INITIALISATION 

59 

60 def __init__( 

61 self, 

62 log, 

63 settings=False, 

64 pathToDataFile=False, 

65 version=False, 

66 catalogueName="", 

67 coordinateList=[], 

68 radiusArcsec=False 

69 ): 

70 self.log = log 

71 log.debug("instansiating a new '_base_importer' object") 

72 self.settings = settings 

73 self.pathToDataFile = pathToDataFile 

74 self.version = version 

75 self.catalogueName = catalogueName 

76 self.coordinateList = coordinateList 

77 self.radiusArcsec = radiusArcsec 

78 self.myPid = str(os.getpid()) 

79 # xt-self-arg-tmpx 

80 

81 # INITIAL ACTIONS 

82 # SETUP DATABASE CONNECTIONS 

83 # SETUP ALL DATABASE CONNECTIONS 

84 from sherlock import database 

85 db = database( 

86 log=self.log, 

87 settings=self.settings 

88 ) 

89 dbConns, dbVersions = db.connect() 

90 self.transientsDbConn = dbConns["transients"] 

91 self.cataloguesDbConn = dbConns["catalogues"] 

92 

93 # OPEN THE FILE TO IMPORT THE DATA FROM 

94 if pathToDataFile: 

95 pathToReadFile = pathToDataFile 

96 try: 

97 self.log.debug("attempting to open the file %s" % 

98 (pathToReadFile,)) 

99 readFile = codecs.open(pathToReadFile, mode='r') 

100 self.catData = readFile.read() 

101 readFile.close() 

102 except IOError as e: 

103 message = 'could not open the file %s' % (pathToReadFile,) 

104 self.log.critical(message) 

105 raise IOError(message) 

106 readFile.close() 

107 else: 

108 self.catData = None 

109 

110 # GET THE VERSION TO APPEND TO THE DATABASE TABLE NAME FOR THE 

111 # CATALOGUE 

112 if self.version: 

113 self.version = "_v" + \ 

114 self.version.replace(" ", "").replace( 

115 "v", "").replace(".", "_") 

116 else: 

117 self.version = "" 

118 version = self.version 

119 

120 # BUILD THE DATABASE TABLE NAME 

121 self.dbTableName = "tcs_cat_%(catalogueName)s%(version)s" % locals() 

122 

123 # SOME DEFAULT OBJECT ATTRIBUTES THAT CAN BE SUPERSEDED 

124 self.primaryIdColumnName = "primaryId" 

125 self.databaseInsertbatchSize = 2500 

126 self.raColName = "raDeg" 

127 self.declColName = "decDeg" 

128 self.uniqueKeyList = [self.raColName, "decDeg"] 

129 

130 # DATETIME REGEX - EXPENSIVE OPERATION, LET"S JUST DO IT ONCE 

131 self.reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') 

132 

133 return None 

134 

135 def add_data_to_database_table( 

136 self, 

137 dictList, 

138 createStatement=False): 

139 """*Import data in the list of dictionaries in the requested database table* 

140 

141 Also adds HTMIDs and updates the sherlock-catalogue database helper table with the time-stamp of when the imported catlogue was last updated 

142 

143 **Key Arguments** 

144 

145 - ``dictList`` - a list of dictionaries containing all the rows in the catalogue to be imported 

146 - ``createStatement`` - the table's mysql create statement (used to generate table if it does not yet exist in database). Default *False* 

147 

148 

149 **Usage** 

150 

151 ```python 

152 self.add_data_to_database_table( 

153 dictList=dictList, 

154 createStatement=createStatement 

155 ) 

156 ``` 

157 

158 

159 .. todo :: 

160 

161 - Write a checklist for creating a new sherlock database importer 

162 """ 

163 self.log.debug('starting the ``add_data_to_database_table`` method') 

164 

165 if len(dictList) == 0: 

166 return 

167 

168 myPid = self.myPid 

169 dbTableName = self.dbTableName 

170 

171 if createStatement: 

172 writequery( 

173 log=self.log, 

174 sqlQuery=createStatement, 

175 dbConn=self.cataloguesDbConn, 

176 ) 

177 

178 insert_list_of_dictionaries_into_database_tables( 

179 dbConn=self.cataloguesDbConn, 

180 log=self.log, 

181 dictList=dictList, 

182 dbTableName=dbTableName, 

183 uniqueKeyList=[], 

184 dateModified=True, 

185 dateCreated=True, 

186 batchSize=10000, 

187 replace=True, 

188 dbSettings=self.settings["database settings"][ 

189 "static catalogues"] 

190 ) 

191 

192 self._add_htmids_to_database_table() 

193 

194 cleaner = database_cleaner( 

195 log=self.log, 

196 settings=self.settings 

197 ) 

198 cleaner._update_tcs_helper_catalogue_tables_info_with_new_tables() 

199 

200 self._update_database_helper_table() 

201 

202 if "ned_stream" not in dbTableName: 

203 print("""Now: 

204 

205 - [ ] edit the `%(dbTableName)s` row in the sherlock catalogues database adding relevant column mappings, catalogue version number etc 

206 - [ ] retire any previous version of this catlogue in the database. Renaming the catalogue-table by appending `legacy_` and also change the name in the `tcs_helper_catalogue_tables_info` table 

207 - [ ] dupliate views from the previous catalogue version to point towards the new version and then delete the old views 

208 - [ ] run the command `sherlock clean [-s <pathToSettingsFile>]` to clean up helper tables 

209 - [ ] switch out the old catalogue table/views in your sherlock search algorithms in the yaml settings files 

210 - [ ] run a test batch of transients to make sure catalogue is installed as expected 

211 

212 """ % locals()) 

213 

214 self.log.debug('completed the ``add_data_to_database_table`` method') 

215 return None 

216 

217 def _add_htmids_to_database_table( 

218 self): 

219 """*Add HTMIDs to database table once all the data has been imported (HTM Levels 10,13,16)* 

220 

221 **Usage** 

222 

223 ```python 

224 self._add_htmids_to_database_table() 

225 ``` 

226 

227 """ 

228 self.log.debug('starting the ``add_htmids_to_database_table`` method') 

229 

230 tableName = self.dbTableName 

231 

232 self.log.info("Adding HTMIds to %(tableName)s" % locals()) 

233 

234 add_htm_ids_to_mysql_database_table( 

235 raColName=self.raColName, 

236 declColName=self.declColName, 

237 tableName=self.dbTableName, 

238 dbConn=self.cataloguesDbConn, 

239 log=self.log, 

240 primaryIdColumnName=self.primaryIdColumnName, 

241 dbSettings=self.settings["database settings"]["static catalogues"] 

242 ) 

243 

244 self.log.debug('completed the ``add_htmids_to_database_table`` method') 

245 return None 

246 

247 def _update_database_helper_table( 

248 self): 

249 """*Update the sherlock catalogues database helper table with the time-stamp of when this catlogue was last updated* 

250 

251 **Usage** 

252 

253 ```python 

254 self._update_database_helper_table() 

255 ``` 

256 

257 """ 

258 self.log.debug('starting the ``_update_database_helper_table`` method') 

259 

260 tableName = self.dbTableName 

261 

262 sqlQuery = u""" 

263 update tcs_helper_catalogue_tables_info set last_updated = now() where table_name = "%(tableName)s"; 

264 """ % locals() 

265 

266 writequery( 

267 log=self.log, 

268 sqlQuery=sqlQuery, 

269 dbConn=self.cataloguesDbConn, 

270 ) 

271 

272 self.log.debug( 

273 'completed the ``_update_database_helper_table`` method') 

274 return None 

275 

276 # use the tab-trigger below for new method 

277 # xt-class-method