Coverage for sherlock/imports/ned.py: 90%

155 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-10-10 13:58 +0000

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*Import ned stream into sherlock-catalogues database* 

5 

6:Author: 

7 David Young 

8""" 

9from __future__ import print_function, division 

10from ._base_importer import _base_importer 

11from fundamentals.mysql import directory_script_runner, readquery, writequery 

12from fundamentals.renderer import list_of_dictionaries 

13from astrocalc.coords import unit_conversion 

14from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables 

15from HMpTy.mysql import add_htm_ids_to_mysql_database_table 

16from neddy import namesearch, conesearch 

17from docopt import docopt 

18from datetime import datetime, date, time 

19import re 

20import string 

21import codecs 

22import pickle 

23import glob 

24import readline 

25 

26from past.utils import old_div 

27import sys 

28import os 

29os.environ['TERM'] = 'vt100' 

30 

31 

32class ned(_base_importer): 

33 """ 

34 *Using a list of coordinates, query the online* `NED <https://ned.ipac.caltech.edu/>`_ *database and import sources found within a given search radius of each of the loctions into the sherlock-catalogues database* 

35 

36 The code: 

37 

38 1. Uses the list of transient coordinates and queries NED (conesearch) for the results within the given search radius 

39 2. Creates the `tcs_cat_ned_stream` table if it doesn't exist 

40 3. Adds the resulting matched NED IDs/Names to the `tcs_cat_ned_stream` table 

41 4. Updates the NED query history table 

42 5. Queris NED via NED IDs (object search) for the remaining source metadata to be added to the `tcs_cat_ned_stream` table 

43 

44 Note it's up to the user to filter the input coordinate list by checking whether or not the same area of the sky has been imported into the `tcs_cat_ned_stream` table recently (by checking the `tcs_helper_ned_query_history` table) 

45 

46 **Key Arguments** 

47 

48 - ``dbConn`` -- mysql database connection 

49 - ``log`` -- logger 

50 - ``settings`` -- the settings dictionary 

51 - ``coordinateList`` -- list of coordinates (a list of strings with RA and DEC space separated) 

52 - ``radiusArcsec`` - - the radius in arcsec with which to perform the initial NED conesearch. Default * False* 

53 

54 

55 **Usage** 

56 

57 To import the ned catalogue stream, run the following: 

58 

59 

60 ```python 

61 from sherlock.imports import ned 

62 ``` 

63 

64 stream = ned( 

65 log=log, 

66 settings=settings, 

67 coordinateList=["23.12323 -12.34343","345.43234 45.26789"], 

68 radiusArcsec=180 

69 ) 

70 stream.ingest() 

71 

72 .. todo :: 

73 

74 - test this code is still working after changes 

75 - add option to filter coordinate list via the `tcs_helper_ned_query_history` table 

76 - check sublime snippet exists 

77 - clip any useful text to docs mindmap 

78 """ 

79 # INITIALISATION 

80 

81 def ingest(self): 

82 """*Perform conesearches of the online NED database and import the results into a the sherlock-database* 

83 

84 The code: 

85 

86 1. uses the list of transient coordinates and queries NED for the results within the given search radius 

87 2. Creates the `tcs_cat_ned_stream` table if it doesn't exist 

88 3. Adds the resulting NED IDs/Names to the `tcs_cat_ned_stream` table 

89 4. Updates the NED query history table 

90 5. Queris NED via NED IDs for the remaining source metadata to be added to the `tcs_cat_ned_stream` table 

91 

92 **Usage** 

93 

94 Having setup the NED object with a coordinate list and cone-search radius, run the `ingest()` method 

95 

96 ```python 

97 stream.ingest() 

98 ``` 

99 

100 

101 .. todo :: 

102 

103 - check sublime snippet exists 

104 - clip any useful text to docs mindmap 

105 - regenerate the docs and check redendering of this docstring 

106 """ 

107 self.log.debug('starting the ``ingest`` method') 

108 

109 if not self.radiusArcsec: 

110 self.log.error( 

111 'please give a radius in arcsec with which to preform the initial NED conesearch' % locals()) 

112 sys.exit(0) 

113 

114 # VARIABLES 

115 # SIZE OF NUMBER OF ROWS TO INSERT INTO DATABASE TABLE AT ANY ONE GO 

116 self.databaseInsertbatchSize = 10000 

117 

118 # THE DATABASE TABLE TO STREAM THE NED DATA INTO 

119 self.dbTableName = "tcs_cat_ned_stream" 

120 

121 dictList = self._create_dictionary_of_ned() 

122 

123 tableName = self.dbTableName 

124 

125 createStatement = """CREATE TABLE IF NOT EXISTS `%(tableName)s` ( 

126 `primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter', 

127 `ned_name` varchar(150) NOT NULL, 

128 `redshift` double DEFAULT NULL, 

129 `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP, 

130 `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP, 

131 `updated` varchar(45) DEFAULT '0', 

132 `major_diameter_arcmin` double DEFAULT NULL, 

133 `ned_notes` varchar(700) DEFAULT NULL, 

134 `object_type` varchar(100) DEFAULT NULL, 

135 `redshift_err` double DEFAULT NULL, 

136 `redshift_quality` varchar(100) DEFAULT NULL, 

137 `magnitude_filter` varchar(10) DEFAULT NULL, 

138 `minor_diameter_arcmin` double DEFAULT NULL, 

139 `morphology` varchar(50) DEFAULT NULL, 

140 `hierarchy` varchar(50) DEFAULT NULL, 

141 `galaxy_morphology` varchar(50) DEFAULT NULL, 

142 `radio_morphology` varchar(50) DEFAULT NULL, 

143 `activity_type` varchar(50) DEFAULT NULL, 

144 `raDeg` double DEFAULT NULL, 

145 `decDeg` double DEFAULT NULL, 

146 `eb_v` double DEFAULT NULL, 

147 `htm16ID` bigint(20) DEFAULT NULL, 

148 `download_error` tinyint(1) DEFAULT '0', 

149 `htm10ID` bigint(20) DEFAULT NULL, 

150 `htm13ID` bigint(20) DEFAULT NULL, 

151 PRIMARY KEY (`primaryId`), 

152 UNIQUE KEY `ned_name` (`ned_name`), 

153 KEY `idx_htm16ID` (`htm16ID`), 

154 KEY `raDeg` (`raDeg`), 

155 KEY `downloadError` (`download_error`), 

156 KEY `idx_htm10ID` (`htm10ID`), 

157 KEY `idx_htm13ID` (`htm13ID`) 

158) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1; 

159""" % locals() 

160 

161 self.add_data_to_database_table( 

162 dictList=dictList, 

163 createStatement=createStatement 

164 ) 

165 

166 self._update_ned_query_history() 

167 self._download_ned_source_metadata() 

168 

169 self.log.debug('completed the ``ingest`` method') 

170 return None 

171 

172 def _create_dictionary_of_ned( 

173 self): 

174 """*Create a list of dictionaries containing all the object ids (NED names) in the ned stream* 

175 

176 **Return** 

177 

178 - ``dictList`` - a list of dictionaries containing all the object ids (NED names) in the ned stream 

179 

180 

181 **Usage** 

182 

183 ```python 

184 dictList = stream._create_dictionary_of_ned() 

185 ``` 

186 

187 """ 

188 self.log.debug( 

189 'starting the ``_create_dictionary_of_ned`` method') 

190 

191 # GET THE NAMES (UNIQUE IDS) OF THE SOURCES WITHIN THE CONESEARCH FROM 

192 # NED 

193 names, searchParams = conesearch( 

194 log=self.log, 

195 radiusArcsec=self.radiusArcsec, 

196 nearestOnly=False, 

197 unclassified=True, 

198 quiet=False, 

199 listOfCoordinates=self.coordinateList, 

200 outputFilePath=False, 

201 verbose=False 

202 ).get_crossmatch_names() 

203 

204 dictList = [] 

205 dictList[:] = [{"ned_name": n} for n in names] 

206 

207 self.log.debug( 

208 'completed the ``_create_dictionary_of_ned`` method') 

209 return dictList 

210 

211 def _update_ned_query_history( 

212 self): 

213 """*Update the database helper table to give details of the ned cone searches performed* 

214 

215 *Usage:* 

216 

217 ```python 

218 stream._update_ned_query_history() 

219 ``` 

220 """ 

221 self.log.debug('starting the ``_update_ned_query_history`` method') 

222 

223 myPid = self.myPid 

224 

225 # ASTROCALC UNIT CONVERTER OBJECT 

226 converter = unit_conversion( 

227 log=self.log 

228 ) 

229 

230 # UPDATE THE DATABASE HELPER TABLE TO GIVE DETAILS OF THE NED CONE 

231 # SEARCHES PERFORMED 

232 dataList = [] 

233 for i, coord in enumerate(self.coordinateList): 

234 if isinstance(coord, ("".__class__, u"".__class__)): 

235 ra = coord.split(" ")[0] 

236 dec = coord.split(" ")[1] 

237 elif isinstance(coord, tuple) or isinstance(coord, list): 

238 ra = coord[0] 

239 dec = coord[1] 

240 

241 dataList.append( 

242 {"raDeg": ra, 

243 "decDeg": dec, 

244 "arcsecRadius": self.radiusArcsec} 

245 ) 

246 

247 if len(dataList) == 0: 

248 return None 

249 

250 # CREATE TABLE IF NOT EXIST 

251 createStatement = """CREATE TABLE IF NOT EXISTS `tcs_helper_ned_query_history` ( 

252 `primaryId` bigint(20) NOT NULL AUTO_INCREMENT, 

253 `raDeg` double DEFAULT NULL, 

254 `decDeg` double DEFAULT NULL, 

255 `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP, 

256 `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP, 

257 `updated` varchar(45) DEFAULT '0', 

258 `arcsecRadius` int(11) DEFAULT NULL, 

259 `dateQueried` datetime DEFAULT CURRENT_TIMESTAMP, 

260 `htm16ID` bigint(20) DEFAULT NULL, 

261 `htm13ID` int(11) DEFAULT NULL, 

262 `htm10ID` int(11) DEFAULT NULL, 

263 PRIMARY KEY (`primaryId`), 

264 KEY `idx_htm16ID` (`htm16ID`), 

265 KEY `dateQueried` (`dateQueried`), 

266 KEY `dateHtm16` (`dateQueried`,`htm16ID`), 

267 KEY `idx_htm10ID` (`htm10ID`), 

268 KEY `idx_htm13ID` (`htm13ID`) 

269) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 

270 """ 

271 writequery( 

272 log=self.log, 

273 sqlQuery=createStatement, 

274 dbConn=self.cataloguesDbConn 

275 ) 

276 

277 # USE dbSettings TO ACTIVATE MULTIPROCESSING 

278 insert_list_of_dictionaries_into_database_tables( 

279 dbConn=self.cataloguesDbConn, 

280 log=self.log, 

281 dictList=dataList, 

282 dbTableName="tcs_helper_ned_query_history", 

283 uniqueKeyList=[], 

284 dateModified=True, 

285 batchSize=10000, 

286 replace=True, 

287 dbSettings=self.settings["database settings"][ 

288 "static catalogues"] 

289 ) 

290 

291 # INDEX THE TABLE FOR LATER SEARCHES 

292 add_htm_ids_to_mysql_database_table( 

293 raColName="raDeg", 

294 declColName="decDeg", 

295 tableName="tcs_helper_ned_query_history", 

296 dbConn=self.cataloguesDbConn, 

297 log=self.log, 

298 primaryIdColumnName="primaryId", 

299 dbSettings=self.settings["database settings"]["static catalogues"] 

300 ) 

301 

302 self.log.debug('completed the ``_update_ned_query_history`` method') 

303 return None 

304 

305 def _download_ned_source_metadata( 

306 self): 

307 """*Query NED using the names of the NED sources in our local database to retrieve extra metadata* 

308 

309 *Usage:* 

310 

311 ```python 

312 stream._download_ned_source_metadata() 

313 ``` 

314 """ 

315 self.log.debug('starting the ``_download_ned_source_metadata`` method') 

316 

317 self.dbTableName = "tcs_cat_ned_stream" 

318 

319 total, batches = self._count_ned_sources_in_database_requiring_metadata() 

320 

321 print( 

322 "%(total)s galaxies require metadata. Need to send %(batches)s batch requests to NED." % locals()) 

323 

324 self.log.info( 

325 "%(total)s galaxies require metadata. Need to send %(batches)s batch requests to NED." % locals()) 

326 

327 totalBatches = self.batches 

328 thisCount = 0 

329 

330 # FOR EACH BATCH, GET THE GALAXY IDs, QUERY NED AND UPDATE THE DATABASE 

331 # THEN RECOUNT TO DETERMINE IF THERE ARE REMAINING SOURCES TO GRAB 

332 # METADATA FOR 

333 while self.total: 

334 thisCount += 1 

335 self._get_ned_sources_needing_metadata() 

336 self._do_ned_namesearch_queries_and_add_resulting_metadata_to_database( 

337 thisCount) 

338 self._count_ned_sources_in_database_requiring_metadata() 

339 

340 self.log.debug( 

341 'completed the ``_download_ned_source_metadata`` method') 

342 return None 

343 

344 def _get_ned_sources_needing_metadata( 

345 self): 

346 """*Get the names of 50000 or less NED sources that still require metabase in the database* 

347 

348 **Return** 

349 

350 - ``len(self.theseIds)`` -- the number of NED IDs returned 

351 

352 

353 *Usage:* 

354 

355 ```python 

356 numberSources = stream._get_ned_sources_needing_metadata() 

357 ``` 

358 """ 

359 self.log.debug( 

360 'starting the ``_get_ned_sources_needing_metadata`` method') 

361 

362 tableName = self.dbTableName 

363 

364 # SELECT THE DATA FROM NED TABLE 

365 sqlQuery = u""" 

366 select ned_name from %(tableName)s where raDeg is null and (download_error != 1 or download_error is null) limit 50000; 

367 """ % locals() 

368 sqlQuery = u""" 

369 select ned_name from %(tableName)s where (download_error != 1 or download_error is null) limit 50000; 

370 """ % locals() 

371 rows = readquery( 

372 log=self.log, 

373 sqlQuery=sqlQuery, 

374 dbConn=self.cataloguesDbConn, 

375 quiet=False 

376 ) 

377 

378 self.theseIds = [] 

379 self.theseIds[:] = [r["ned_name"].replace('"', '\\"') for r in rows] 

380 

381 self.log.debug( 

382 'completed the ``_get_ned_sources_needing_metadata`` method') 

383 

384 return len(self.theseIds) 

385 

386 def _do_ned_namesearch_queries_and_add_resulting_metadata_to_database( 

387 self, 

388 batchCount): 

389 """*Query NED via name searcha and add result metadata to database* 

390 

391 **Key Arguments** 

392 

393 - ``batchCount`` - the index number of the batch sent to NED (only needed for printing to STDOUT to give user idea of progress) 

394 

395 

396 *Usage:* 

397 

398 ```python 

399 numberSources = stream._do_ned_namesearch_queries_and_add_resulting_metadata_to_database(batchCount=10) 

400 ``` 

401 """ 

402 self.log.debug( 

403 'starting the ``_do_ned_namesearch_queries_and_add_resulting_metadata_to_database`` method') 

404 

405 # ASTROCALC UNIT CONVERTER OBJECT 

406 converter = unit_conversion( 

407 log=self.log 

408 ) 

409 tableName = self.dbTableName 

410 

411 # QUERY NED WITH BATCH 

412 totalCount = len(self.theseIds) 

413 print("requesting metadata from NED for %(totalCount)s galaxies (batch %(batchCount)s)" % locals()) 

414 # QUERY THE ONLINE NED DATABASE USING NEDDY'S NAMESEARCH METHOD 

415 search = namesearch( 

416 log=self.log, 

417 names=self.theseIds, 

418 quiet=True 

419 ) 

420 results = search.get() 

421 print("results returned from ned -- starting to add to database" % locals()) 

422 

423 # CLEAN THE RETURNED DATA AND UPDATE DATABASE 

424 totalCount = len(results) 

425 count = 0 

426 sqlQuery = "" 

427 dictList = [] 

428 for thisDict in results: 

429 thisDict["tableName"] = tableName 

430 count += 1 

431 for k, v in list(thisDict.items()): 

432 if not v or len(v) == 0: 

433 thisDict[k] = "null" 

434 if k in ["major_diameter_arcmin", "minor_diameter_arcmin"] and (":" in v or "?" in v or "<" in v): 

435 thisDict[k] = v.replace(":", "").replace( 

436 "?", "").replace("<", "") 

437 if isinstance(v, ("".__class__, u"".__class__)) and '"' in v: 

438 thisDict[k] = v.replace('"', '\\"') 

439 if "Input name not" not in thisDict["input_note"] and "Same object as" not in thisDict["input_note"]: 

440 try: 

441 thisDict["raDeg"] = converter.ra_sexegesimal_to_decimal( 

442 ra=thisDict["ra"] 

443 ) 

444 thisDict["decDeg"] = converter.dec_sexegesimal_to_decimal( 

445 dec=thisDict["dec"] 

446 ) 

447 except: 

448 name = thisDict["input_name"] 

449 self.log.warning( 

450 "Could not convert the RA & DEC for the %(name)s NED source" % locals()) 

451 continue 

452 thisDict["eb_v"] = thisDict["eb-v"] 

453 thisDict["ned_name"] = thisDict["input_name"] 

454 row = {} 

455 for k in ["redshift_quality", "redshift", "hierarchy", "object_type", "major_diameter_arcmin", "morphology", "magnitude_filter", "ned_notes", "eb_v", "raDeg", "radio_morphology", "activity_type", "minor_diameter_arcmin", "decDeg", "redshift_err", "ned_name"]: 

456 if thisDict[k] == "null": 

457 row[k] = None 

458 else: 

459 row[k] = thisDict[k] 

460 

461 if '"' in thisDict["ned_name"]: 

462 print(thisDict) 

463 print(thisDict["ned_name"]) 

464 sys.exit(0) 

465 

466 dictList.append(row) 

467 

468 self.add_data_to_database_table( 

469 dictList=dictList, 

470 createStatement="""SET SESSION sql_mode="";""" 

471 ) 

472 

473 theseIds = ("\", \"").join(self.theseIds) 

474 

475 sqlQuery = u""" 

476 update %(tableName)s set download_error = 1 where ned_name in ("%(theseIds)s"); 

477 """ % locals() 

478 writequery( 

479 log=self.log, 

480 sqlQuery=sqlQuery, 

481 dbConn=self.cataloguesDbConn, 

482 ) 

483 

484 print("%(count)s/%(totalCount)s galaxy metadata batch entries added to database" % locals()) 

485 if count < totalCount: 

486 # Cursor up one line and clear line 

487 sys.stdout.write("\x1b[1A\x1b[2K") 

488 

489 sqlQuery = u""" 

490 update tcs_helper_catalogue_tables_info set last_updated = now() where table_name = "%(tableName)s" 

491 """ % locals() 

492 writequery( 

493 log=self.log, 

494 sqlQuery=sqlQuery, 

495 dbConn=self.cataloguesDbConn, 

496 ) 

497 

498 self.log.debug( 

499 'completed the ``_do_ned_namesearch_queries_and_add_resulting_metadata_to_database`` method') 

500 return None 

501 

502 def _count_ned_sources_in_database_requiring_metadata( 

503 self): 

504 """*Count the sources in the NED table requiring metadata* 

505 

506 **Return** 

507 

508 - ``self.total``, ``self.batches`` -- total number of galaxies needing metadata & the number of batches required to be sent to NED 

509 

510 

511 *Usage:* 

512 

513 ```python 

514 totalRemaining, numberOfBatches = stream._count_ned_sources_in_database_requiring_metadata() 

515 ``` 

516 """ 

517 self.log.debug( 

518 'starting the ``_count_ned_sources_in_database_requiring_metadata`` method') 

519 

520 tableName = self.dbTableName 

521 

522 # sqlQuery = u""" 

523 # select count(*) as count from %(tableName)s where raDeg is null and (download_error != 1 or download_error is null) 

524 # """ % locals() 

525 sqlQuery = u""" 

526 select count(*) as count from %(tableName)s where (download_error != 1 or download_error is null) 

527 """ % locals() 

528 rows = readquery( 

529 log=self.log, 

530 sqlQuery=sqlQuery, 

531 dbConn=self.cataloguesDbConn, 

532 quiet=False 

533 ) 

534 self.total = rows[0]["count"] 

535 self.batches = int(old_div(self.total, 50000.)) + 1 

536 

537 if self.total == 0: 

538 self.batches = 0 

539 

540 self.log.debug( 

541 'completed the ``_count_ned_sources_in_database_requiring_metadata`` method') 

542 return self.total, self.batches 

543 

544 # use the tab-trigger below for new method 

545 # xt-class-method