Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Import ATel into MySQL database and parse for names and coordinates*
6:Author:
7 David Young
8"""
9from builtins import str
10from builtins import range
11from builtins import object
12import sys
13import os
14os.environ['TERM'] = 'vt100'
15from fundamentals import tools, times
16import re
17import sys
18from datetime import datetime
19from fundamentals.mysql import database, readquery, writequery, convert_dictionary_to_mysql_table
20import codecs
21from astrocalc.coords import unit_conversion
24class mysql(object):
25 """
26 *Import ATel into MySQL database and parse for names and coordinates*
28 **Key Arguments**
30 - ``log`` -- logger
31 - ``settings`` -- the settings dictionary
32 - ``reParse`` -- re-parse all existing atels? Useful if new names have been added to the parse-list
34 **Usage**
36 To setup your logger, settings and database connections, please use the ``fundamentals`` package (`see tutorial here <http://fundamentals.readthedocs.io/en/latest/#tutorial>`_).
38 To initiate a mysql object, use the following:
40 ```python
41 from atelParser import mysql
42 parser = mysql(
43 log=log,
44 settings=settings
45 )
46 ```
47 """
48 # Initialisation
50 def __init__(
51 self,
52 log,
53 settings=False,
54 reParse=False
56 ):
57 self.log = log
58 log.debug("instansiating a new 'mysql' object")
59 self.settings = settings
60 self.reParse = reParse
62 # SETUP ALL DATABASE CONNECTION
63 self.dbSettings = settings["database settings"]
64 self.dbConn = database(
65 log=log,
66 dbSettings=self.dbSettings
67 ).connect()
69 return None
71 def atels_to_database(
72 self):
73 """*Parse ATels into a mysql db.*
75 Parser to add ATels into a mysql db - each ATel has 'element' data (top level - title, author ...) and 'item' data (object specific data - ra, dec, mag, name ...). The parser will add one row per 'item' (object) into the db table
77 **Usage**
79 ```python
80 from atelParser import mysql
81 parser = mysql(
82 log=log,
83 settings=settings
84 )
85 parser.atels_to_database()
86 ```
87 """
88 self.log.debug('starting the ``atels_to_database`` method')
90 self._create_atel_database_tables()
92 # LIST ALL PARSED ATEL NUMBERS IN DATABASE
93 sqlQuery = u"""
94 SELECT distinct atelNumber
95 FROM atel_fullcontent
96 ORDER BY atelNumber DESC
97 """ % locals()
98 rows = readquery(
99 log=self.log,
100 sqlQuery=sqlQuery,
101 dbConn=self.dbConn,
102 quiet=False
103 )
104 databaseAtels = []
105 databaseAtels = []
106 databaseAtels[:] = [int(l['atelNumber']) for l in rows]
108 # LIST HTML ATEL FILES DOWNLOADED
109 basePath = self.settings["atel-directory"]
110 atelDownloaded = []
111 atelDownloaded[:] = [int(d.replace(".html", "")) for d in os.listdir(basePath) if os.path.isfile(
112 os.path.join(basePath, d)) and ".html" in d]
114 # CREATE LIST OF FILES TO NOW PARSE
115 atelsToParse = []
116 atelsToParse[:] = [self.settings["atel-directory"] +
117 "/%(a)0.8d.html" % locals() for a in atelDownloaded if a not in databaseAtels]
119 # LOOP THROUGH THE ATEL FILES AND ADD THE VARIOUS HTML ELEMENTS AND TAGS TO
120 # DB
121 for atel in atelsToParse:
122 if not atel:
123 continue
125 # READ HTML FILE
126 try:
127 self.log.debug("attempting to open the file %s" % (atel,))
128 readFile = codecs.open(atel, encoding='utf-8', mode='r')
129 html = readFile.read()
130 readFile.close()
131 except IOError as e:
132 message = 'could not open the file %s' % (atel,)
133 self.log.critical(message)
134 raise IOError(message)
136 elementDict = {}
138 # ATEL TITLE
139 reTitle = re.compile(
140 r'<TITLE>.*?#\d{1,5}:\s?(.*?)\s?<\/TITLE>', re.M | re.I)
141 try:
142 title = reTitle.search(html).group(1)
143 except:
144 # QUIT WHENEVER A TITLE IS NOT FOUND IN THE HTML DOC (i.e. ATEL
145 # DOES NOT EXIST YET)
146 continue
147 title = None
148 elementDict['title'] = title
150 # ATEL NUMBER
151 reAtelNumber = re.compile(
152 r'<P ALIGN=CENTER>\s?ATel\s?#(\d{1,5})', re.M | re.I)
153 try:
154 atelNumber = reAtelNumber.search(html).group(1)
155 except:
156 atelNumber = None
157 # print atelNumber
158 elementDict['atelNumber'] = atelNumber
160 # ATEL AUTHORS
161 reWho = re.compile(
162 r'<A HREF=\"mailto:([\w.\-@]*)\">(.*?)<', re.M | re.I)
163 try:
164 email = reWho.search(html).group(1)
165 authors = reWho.search(html).group(2)
166 except:
167 email = None
168 authors = None
169 elementDict['email'] = email
170 elementDict['authors'] = authors
172 # ATEL DATETIME
173 redateTime = re.compile(
174 r'<STRONG>(\d{1,2}\s\w{1,10}\s\d{4});\s(\d{1,2}:\d{2})\sUT</STRONG>', re.M | re.I)
175 try:
176 date = redateTime.search(html).group(1)
177 time = redateTime.search(html).group(2)
179 except:
180 date = None
181 time = None
183 datePublished = date + " " + time
184 datePublished = datetime.strptime(datePublished, '%d %b %Y %H:%M')
185 # print "datePublished = %s" % (datePublished,)
186 elementDict['datePublished'] = datePublished
188 # ATEL
189 reTags = re.compile(
190 r'<p class="subjects">Subjects: (.*?)</p>', re.M | re.I)
191 try:
192 tags = reTags.search(html).group(1)
193 except:
194 tags = None
195 elementDict['tags'] = tags
197 # ATEL USER ADDED TEXT
198 reUserText = re.compile(
199 r'</div id="subjects">.*?(<div id="references">.*?</div id="references">)?<P>(.*)</P>.*?(<a href="http://twitter.com/share|</TD><TD>)', re.S | re.I)
200 try:
201 userText = reUserText.search(html).group(2)
202 except:
203 userText = None
204 elementDict['userText'] = userText
206 # FIND REFS IN USER ADDED TEXT
207 refList = []
208 reOneRef = re.compile(
209 r'http:\/\/www.astronomerstelegram.org\/\?read=(\d{1,5})', re.M | re.I)
210 try:
211 refIter = reOneRef.finditer(userText)
212 except:
213 refIter = None
214 if refIter:
215 for item in refIter:
216 refList.extend([item.group(1)])
217 else:
218 pass
219 refList = set(refList)
220 refList = ", ".join(refList)
221 elementDict['refList'] = refList
223 # ATEL BACK REFERENCES - FIND EXTRA BACK REFS IN REFERENCE DIV
224 reBacksRefs = re.compile(
225 r'<div id="references">(.*?)</div id="references">', re.M | re.I)
226 try:
227 backRefs = reBacksRefs.search(html).group(1)
228 except:
229 backRefs = None
230 backRefList = []
231 reOneBackRef = re.compile(
232 r'<A HREF="http:\/\/www.astronomerstelegram.org\/\?read=(\d{1,7})">\1</a>', re.M | re.I)
233 try:
234 backRefIter = reOneBackRef.finditer(backRefs)
235 except:
236 backRefIter = None
237 if backRefIter:
238 for item in backRefIter:
239 # print item.group(1)
240 backRefList.extend([item.group(1)])
241 else:
242 # print backRefIter
243 pass
244 # REMOVE DUPLICATE ATEL NUMBERS FROM LIST
245 backRefList = set(backRefList)
246 backRefList = ", ".join(backRefList)
247 elementDict['backRefList'] = backRefList
249 convert_dictionary_to_mysql_table(
250 dbConn=self.dbConn,
251 log=self.log,
252 dictionary=elementDict,
253 dbTableName="atel_fullcontent",
254 uniqueKeyList=["atelNumber"],
255 dateModified=False,
256 returnInsertOnly=False,
257 replace=False,
258 batchInserts=False, # will only return inserts,
259 reDatetime=re.compile(
260 '^[0-9]{4}-[0-9]{2}-[0-9]{2}T') # OR FALSE
261 )
263 self.log.debug('completed the ``atels_to_database`` method')
264 return None
266 def parse_atels(
267 self):
268 """*Parse the content of the ATels in the database*
270 Appending the various components and values to the db. Also includes the ability convert the atels to markdown, highlighting matches of the parsing regexs.
272 **Usage**
274 ```python
275 from atelParser import mysql
276 parser = mysql(
277 log=log,
278 settings=settings
279 )
280 parser.parse_atels()
281 ```
282 """
283 self.log.debug('starting the ``parse_atels`` method')
285 ################ > VARIABLE SETTINGS ######
286 # METRICS TO FILTER ATELS
287 numReferences = 0 # NUMBER OF REFERENCES WITH ATEL
288 tags = "" # ATEL TAGS
289 numCoords = 0 # NUMBER OF COORDINATE PAIRS IN ATEL
290 numHeaderName = 0 # NUMBER OF NAMES IN HEADER
291 numTextName = 0 # NUMBER OF NAMES IN TEXT
292 discHead = 0 # DISCOVERY KEYWORD FOUND IN HEADER?
293 obsHead = 0 # OBSERVATION KEYWORD FOUND IN HEADER?
294 clasHead = 0 # CLASSIFICATION KEYWORD FOUND IN HEADER?
295 correctionHead = 0 # CORRECTION KEYWORD FOUND IN HEADER?
296 discText = 0 # DISCOVERY KEYWORD FOUND IN TEXT?
297 obsText = 0 # OBSERVATION KEYWORD FOUND IN TEXT?
298 clasText = 0 # CLASSIFICATION KEYWORD FOUND IN TEXT?
299 comment = 0 # COMMENT TAG IN ATEL
301 # ASTROCALC UNIT CONVERTER OBJECT
302 converter = unit_conversion(
303 log=self.log
304 )
306 # SELECT UNPROCESSED ATELS
307 if self.reParse == False:
308 whereClause = "dateParsed is NULL"
309 else:
310 whereClause = "1=1"
311 sqlQuery = u"""SELECT *
312 FROM atel_fullcontent
313 where %(whereClause)s
314 ORDER BY atelNumber""" % locals()
315 rows = readquery(
316 log=self.log,
317 sqlQuery=sqlQuery,
318 dbConn=self.dbConn,
319 quiet=False
320 )
322 # REGEX BUILDS
323 start = r"""((R\.?A\.?\b|Coord)[/()\w\d\s,.]{0,9}(\(J2000(\.0)?\)\s?)?(=|:|\s)|\d{4}-\d{2}-\d{2})\s{0,2}[+(]{0,2}"""
324 middle = r"""(\sdeg)?(\s?,|:)?\s{0,2}(and\s{1,2}|\(?[\ddeg.':\s]{1,16}\)?(;|,)?\s{0,3})?(Decl?\.?\s*?[()\w\d\s]{0,9}(=|:|\s))?\s?"""
325 end = r"""(\sdeg)?"""
326 raSex = r"""(?P<raSex>(
327 (?P<raHrs>\d|[0-1]\d|[2][0-3])(:\s?|\s|h\s?)
328 (?P<raMin>[0-5][0-9])(:\s?|\s|m\s?)
329 (?P<raSec>[0-5]\d|\d(?!\d))s?(?P<raSubSec>\.\d{1,})?(\s|\s?s)?
330 )
331 )"""
332 decSex = r"""(?P<decSex>(
333 (?P<decDeg>(\+|-|–)?[0-8]\d)(:\s?|\s|d\s?|deg\s|o\s?)
334 (?P<decMin>[0-5][0-9])(:\s?|\s|m\s?|'?\s?)
335 (?P<decSec>[0-5]?\d)'?\s?(?P<decSubSec>\.\d{1,3})?'?s?
336 )
337 )"""
338 raDeg = r"""
339 (?P<raDDeg>\d{1,3}(\.\d{1,}))
340 """
341 decDeg = r"""
342 (?P<decDDeg>[\+\-\–]?\d{1,3}(\.\d{1,}))
343 """
345 nameList = [
346 r"""(PSN|PNV)\s?J\d{8}(\+|-|–)\d{3}(\+|-|–)?\d{3,4}""",
347 r"""(SN|Supernova)\s?(19|20)\d{2}[A-Za-z]{1,4}""",
348 r"""GX\s?\d{3}(\+|-|–)\d""",
349 r"""Fermi\s?J\d{4}(\+|-|–)\d{4}""",
350 r"""PHL\s?\d{3}""",
351 r"""QSO\s?B\d{4}(\+|-|–)\d{3}""",
352 r"""i?PTF(0|1)\d[a-zA-Z]{1,3}""",
353 r"""MASTER\s?((short\s)?ot\s)?J?\d{6}\.\d{2}(\+|-|–)\d{6}\.\d""",
354 r"""(FSRQ\s?)?PKS\s?\d{4}(\+|-|–)\d{3}""",
355 r"""BZQ\s?J\d{4}(\+|-|–)\d{4}""",
356 r"""(SN(-|–))?LSQ1\d[a-zA-Z]{1,4}""",
357 r"""M31N\s?(19|20)\d{2}(\+|-|–)\d{2}[a-z]""",
358 r"""IGR\s?J?\d{5}(\+|-|–)?\d{1,4}""",
359 r"""GRS\s?\d{4}(\+|-|–)\d{1,4}""",
360 r"""PS1(-|–)?(0|1)\d[a-zA-Z]{1,3}""",
361 r"""PS1\d[a-zA-Z]{1,3}""",
362 r"""SDSS\s(galaxy\s)?J\d{6}\.\d{2}(\+|-|–)\d{6}\.\d""",
363 r"""(CSS|MLS|SSS)\d{6}:\d{6}(\+|-|–)\d{6}""",
364 r"""XMM(U|SL1)\s?J\d{6}\.\d{1}(\+|-|–)\d{6}""",
365 r"""SAX\s?J\d{4}\.\d(\+|-|–)\d{3,4}""",
366 r"""1RXS\s?J\d{6}\.\d(\+|-|–)\d{6}""",
367 r"""USNO(-|–)(B1|A2)\.0\s?(catalogue\s?)\d{4}(-|–)\d{7}""",
368 r"""KS\s?\d{4}(\+|-|–)\d{3}""",
369 r"""AX\s?J\d{4}\.\d(\+|-|–)\d{4}""",
370 r"""2MAS(S|X)\s?J?\d{8}(\+|-|–)\d{7}""",
371 r"""SWIFT\s?J\d{4,6}\.\d(\+|-|–)\d{1,6}""",
372 r"""4U\s?\d{4}(\+|-|–)\d{2,4}""",
373 r"""Hen\s\d{1}(\+|-|–)\d{4}""",
374 r"""(HMXB\s?)?XTE\s?J?\d{4}(\+|-|–)\d{3}""",
375 r"""MAXI\s?J?\d{4}(\+|-|–)\d{3}""",
376 r"""PG\s?J?\d{4}(\+|-|–)\d{3}""",
377 r"""PMN\s?J?\d{4}(\+|-|–)\d{4}""",
378 r"""Guide\sStar\sCatalog\sN4HU\d{6}""",
379 r"""CXOGBS\s?J?\d{6}\.8(\+|-|–)\d{6}""",
380 r"""Galactic\sPlane\s(gamma-ray\s)?Transient\sJ?\d{4}(\+|-|–)\d{4}""",
381 r"""TXS\s\d{4}(\+|-|–)\d{3}""",
382 r"""V\d{4}\sSgr""",
383 r"""Aql\sX(\+|-|–)1""",
384 r"""BLAZAR\s[a-zA-Z\d]{2}\s?\d{3,4}((\+|-|–)\d{2})?""",
385 r"""SNhunt\s\d{1,5}""",
386 r"""Nova\s[a-zA-Z]{3}\s(19|20)\d{2}""",
387 r"""GRB\s?\d{6}[a-zA-Z]{1,2}""",
388 r"""\bV\d{3,4}\s(Sagittarii|cyg)""",
389 r"""SGR\s\d4(\+|-|–)\d{2}""",
390 r"""(QSO|3EG|2FGL)\s?J?\d{4}(\.\d)?(\+|-|–)\d{4}""",
391 r"""BL\sLacertae""",
392 r"""\bCTA\s\d{3}""",
393 r"""ASASSN( |–|-)1\d[a-zA-Z]{1,4}""",
394 r"""OGLE-201\d-(SN|NOVA)-\d{1,4}""",
395 r"""OGLE ?1\d[a-zA-Z]{1,4}""",
396 r"""Gaia ?1\d[a-zA-Z]{1,4}""",
397 r"""DES1\d[a-zA-Z]\d[a-zA-Z]{1,4}""",
398 r"""HFF1\d[a-zA-Z]{1,4}""",
399 r"""HSC-SN1\d[a-zA-Z]{1,4}""",
400 r"""MASTER ?J\d{5,6}\.\d{2}\+\d{5,6}\.\d{1,2}""",
401 r"""SKY( |-|–|_)J\d{6,8}(-|–|\+)\d{6,8}""",
402 r"""SMT ?\d{6,8}(-|–|\+)\d{6,8}""",
403 r"""SN20\d{2}[a-zA-Z]{1,4}""",
404 r"""TCP ?J\d{6,8}(-|–|\+)\d{6,8}""",
405 r"""ATLAS\d{2}\w{1,8}""",
406 r"""AT20\d{2}[a-zA-Z]{1,4}""",
407 r"""ZTF\d{2}[a-zA-Z]{1,15}"""
408 ]
410 # JOIN ALL THE NAMES INTO ONE STRING
411 nameStr = ("|").join(nameList)
412 # REGEX TO SEARCH FOR OBJECT NAMES IN THE ATEL BODIES
413 reName = re.compile(r"""(%s)""" % (nameStr,), re.S | re.I)
415 # REGEX TO SEARCH FOR SEXEGESIMAL COORDINATES WITHIN THE BODY TEXT
416 reSexeg = r"""
417 %s
418 %s
419 %s
420 %s
421 %s
422 """ % (start, raSex, middle, decSex, end)
424 reSexeg = re.compile(r"""%s""" % (reSexeg), re.S | re.I | re.X)
426 # REGEX TO SEARCH FOR DECIMAL DEGREES COORDINATES WITHIN THE BODY TEX
427 reDegree = r"""
428 %s
429 %s
430 (\sdeg)?(\s?,|:)?\s{0,2}(and\s{1,2}|\(?%s\)?(;|,)?\s{0,3})?(Decl?\.?\s*?[()\w\d\s]{0,9}(=|:|\s))?\s?
431 %s
432 %s""" % (start, raDeg, raSex, decDeg, end,)
434 reDegree = re.compile(r"""%s""" % (reDegree,), re.S | re.I | re.X)
436 # REGEX TO SEARCH FOR SEXEG COORDINATES IN TABLES
437 reSexTable = r"""
438 %s
439 \s?(\||</td>\s?<td>)?\s?
440 %s
441 """ % (raSex, decSex,)
443 reSexTable = re.compile(r"""%s""" % (reSexTable, ), re.S | re.I | re.X)
445 # REGEX TO FIND THE SUPERNOVA TYPE
446 reSNType = re.compile(
447 r'type\s(I[abcilps]{1,3}n?)|(\bI[abcilnps]{1,3}n?)\s(SN|supernova)|<td>\s?\b(I[abcilps]{1,3}n?)\b\s?<\/td>|(SN\simpostor)|\|\s?\b(I[abcilps]{1,3}n?)\b\s?\||(SN|supernova)\s?(I[abcilps]{1,3}n?)', re.S | re.I)
449 # ITERATE THROUGH THE NEW UNPROCESSED ATELS
450 for row in rows:
451 atelNumber = row["atelNumber"]
452 userText = row["userText"]
454 self.log.info("""parsing atel: `%(atelNumber)s`""" % locals())
455 # convert bytes to unicode
456 if isinstance(userText, ("".__class__, u"".__class__)):
457 try:
458 userText = str(
459 userText, encoding="utf-8", errors="replace")
460 except:
461 pass
463 # SETUP HEADERS FOR MD -- USED FOR DEBUGGING
464 header = "\n# %s: %s" % (row["atelNumber"], row["title"],)
465 references = "\n### **REFS:** %s" % (row["refList"],)
466 # numReferences = len(row["refList"])
467 tags = "\n### **TAGS:** %s" % (row["tags"],)
469 # REMOVE NIGGLY STRINGS TO MAKE PARSING EASIER
470 stringsToRemove = [
471 u"<p>",
472 u"</p>",
473 u"<P>",
474 u"</P>",
475 u"<P ALIGN=CENTER><EM><A HREF='http://'></A></EM>",
476 u"<pre>",
477 u"</pre>",
478 u"#",
479 u"<b>",
480 u"</b>",
481 u"<br>",
482 u"</br>",
483 u"<P ALIGN=CENTER>",
484 u"<EM>",
485 u"</EM>",
486 u"<sup>",
487 u"</center>",
488 u"<center>",
489 u"</sup>",
490 u"<sub>",
491 u"</sub>",
492 u"<SUP>",
493 u"</CENTER>",
494 u"<CENTER>",
495 u"</SUP>",
496 u"<SUB>",
497 u"</SUB>",
498 u"<br />",
499 u"<pre />",
500 u"<pre/>",
501 u"<PRE>",
502 u"<Pre>",
503 u"<it>",
504 u"</it>",
505 u"<A ",
506 u"</a>",
507 u"</A>",
508 u"<a ",
509 u"_",
510 u"--",
511 u"</BR>",
512 u"<BR>",
513 u"°",
514 u"</div>",
515 u"<div>",
516 u"Ã?Â",
517 u" ",
518 u"***",
519 u"<B>",
520 u"</B>",
521 u"\n"
522 ]
523 for item in stringsToRemove:
524 userText = userText.replace(item, "")
526 for i in range(0, 6):
527 userText = userText.replace(" ", " ")
528 userText = userText.replace(";", ":")
529 userText = userText.replace("±: 0.001", "")
531 # SEARCH FOR SEXEGESIMAL COORDINATES WITHIN THE BODY TEXT
532 try:
533 sIter = reSexeg.finditer(userText)
534 except:
535 sIter = None
537 # 14h 59m 36.51s -71d 46m 60.0s
539 sList = []
540 for item in sIter:
541 # CONVERT RA DEC TO DECIMAL DEGREES
542 raSec = item.group('raSec')
543 if item.group('raSubSec'):
544 raSec += item.group('raSubSec')
545 decSec = item.group('decSec')
546 if item.group('decSubSec'):
547 decSec += item.group('decSubSec')
548 _raSex = """%s:%s:%s""" % (
549 item.group('raHrs'), item.group('raMin'), raSec)
550 _decSex = """%s:%s:%s""" % (
551 item.group('decDeg'), item.group('decMin'), decSec)
553 raDegrees = converter.ra_sexegesimal_to_decimal(
554 ra=_raSex
555 )
556 decDegrees = converter.dec_sexegesimal_to_decimal(
557 dec=_decSex
558 )
560 sList.extend([[str(raDegrees), str(decDegrees)]])
561 userText = userText.replace(
562 item.group('raSex'), " **<font color=blue>" + item.group('raSex') + " </font>** ")
563 userText = userText.replace(
564 item.group('decSex'), " **<font color=blue>" + item.group('decSex') + " </font>** ")
566 # SEARCH FOR DECIMAL DEGREES COORDINATES WITHIN THE BODY TEXT
567 try:
568 sIter2 = reDegree.finditer(userText)
569 except:
570 sIter2 = None
572 for item in sIter2:
573 # print item.group('raDDeg'), item.group('decDDeg')
574 sList.extend([[item.group('raDDeg'), item.group('decDDeg')]])
575 userText = userText.replace(
576 item.group('raDDeg'), " **<font color=green>" + item.group('raDDeg') + " </font>** ")
577 userText = userText.replace(
578 item.group('decDDeg'), " **<font color=green>" + item.group('decDDeg') + " </font>** ")
580 # SEARCH FOR SEXEG COORDINATES IN TABLES
581 try:
582 sIter3 = reSexTable.finditer(userText)
583 except:
584 sIter3 = None
586 for item in sIter3:
587 # CONVERT RA DEC TO DECIMAL DEGREES
588 raSec = item.group('raSec')
589 if item.group('raSubSec'):
590 raSec += item.group('raSubSec')
591 decSec = item.group('decSec')
592 if item.group('decSubSec'):
593 decSec += item.group('decSubSec')
594 _raSex = """%s:%s:%s""" % (
595 item.group('raHrs'), item.group('raMin'), raSec)
596 _decSex = """%s:%s:%s""" % (
597 item.group('decDeg'), item.group('decMin'), decSec)
598 raDegrees = converter.ra_sexegesimal_to_decimal(
599 ra=_raSex
600 )
601 decDegrees = converter.dec_sexegesimal_to_decimal(
602 dec=_decSex
603 )
605 sList.extend([[str(raDegrees), str(decDegrees)]])
606 userText = userText.replace(
607 item.group('raSex'), " **<font color=#dc322f>" + item.group('raSex') + " </font>** ")
608 userText = userText.replace(
609 item.group('decSex'), " **<font color=#dc322f>" + item.group('decSex') + " </font>** ")
611 numCoords = len(sList)
613 # SEARCH FOR NAMES IN THE ATEL BODY
614 try:
615 sIter4 = reName.finditer(header)
616 except:
617 sIter4 = None
618 try:
619 sIter5 = reName.finditer(userText)
620 except:
621 sIter5 = None
623 hnList = []
624 for item in sIter4:
625 hnList.extend([item.group()])
626 hnList = list(set(hnList))
627 numHeaderName = len(hnList)
629 tnList = []
630 for item in sIter5:
631 tnList.extend([item.group()])
632 tnList = list(set(tnList))
633 numTextName = len(tnList)
634 nList = list(set(hnList + tnList))
636 # CLEAN UP THE NAMES BEFORE INGEST
637 for i in range(len(nList)):
638 nList[i] = clean_supernova_name(self.log, nList[i])
639 nList = list(set(nList))
641 userText = reName.sub(
642 r"**<font color=#2aa198>\1</font>**", userText)
643 header = reName.sub(
644 r"**<font color=#2aa198>\1</font>**", header)
646 # DETERMINE THE ATEL TYPE - DISCOVERY, CLASSIFICATION OR
647 # OBSERVATION
648 disc, obs, clas, correction, comment = 0, 0, 0, 0, 0
649 discHead, obsHead, clasHead, correctionHead = 0, 0, 0, 0
650 discText, obsText, clasText = 0, 0, 0
652 # SEARCH FOR DISCOVERY KEYWORDS IN HEADER AND TEXT
653 dList = []
654 reDisc = re.compile(
655 r"""(discovered\sby\sMASTER|Detection.{1,20}MASTER|detection\sof\sa\snew\s|discovery|candidate.{1,10}discovered|\ba\s?candidate|\d{1,4}:\s((Bright|MASTER)\sPSN\sin|Possible\snew\s|(A\s)?new.{1,30}(candidate|discovered)|(Bright|MASTER).{1,20}detection))""", re.I | re.M)
656 reDiscPhrase = re.compile(
657 r"""(We\sreport\sthe\sdiscovery\s)""", re.I)
658 try:
659 dpIter = reDiscPhrase.finditer(userText)
660 except:
661 dpIter = None
662 for item in dpIter:
663 # MIGHT AS WELL BE IN THE HEADER - IF reDiscPhrase AT START OF ATEL,
664 # DEFINITELY A DISCOVERY
665 discHead = 1
666 dList.extend([item.group()])
668 try:
669 dhIter = reDisc.finditer(header)
670 except:
671 dhIter = None
672 for item in dhIter:
673 discHead = 1
674 dList.extend([item.group()])
676 try:
677 dtIter = reDisc.finditer(userText)
678 except:
679 dtIter = None
680 for item in dtIter:
681 discText = 1
682 dList.extend([item.group()])
684 dList = list(set(dList))
685 if len(dList) > 0:
686 try:
687 userText = reDiscPhrase.sub(
688 r"**<font color=#b58900>\1</font>**", userText)
689 except:
690 pass
691 try:
692 userText = reDisc.sub(
693 r"**<font color=#b58900>\1</font>**", userText)
694 except:
695 pass
696 try:
697 header = reDisc.sub(
698 r"**<font color=#b58900>\1</font>**", header)
699 except:
700 pass
702 # SEARCH FOR CLASSIFICATION KEYWORDS IN HEADER AND TEXT
703 cList = []
704 reClass = re.compile(
705 r'(classification|SNID|spectroscopic\sconfirmation|GELATO|discovery.*?SN\sI[abcilps]{1,3}n?)', re.I)
706 try:
707 chIter = reClass.finditer(header)
708 except:
709 chIter = None
710 for item in chIter:
711 clasHead = 1
712 cList.extend([item.group()])
713 try:
714 ctIter = reClass.finditer(userText)
715 except:
716 ctIter = None
717 for item in ctIter:
718 clasText = 1
719 cList.extend([item.group()])
721 reClass2 = re.compile(
722 r'(\sis\sa\s|SN\simpostor|type\sI[abcilps]{0,3}n?|\sI[abcilps]{0,3}n?\ssupernova|\sa\sSN\sI[abcilps]{0,3}n?)', re.I)
723 try:
724 cIter2 = reClass2.finditer(header)
725 except:
726 cIter2 = None
727 for item in cIter2:
728 clasHead = 1
729 cList.extend([item.group()])
731 cList = list(set(cList))
732 if len(cList) > 0:
733 try:
734 userText = reClass.sub(
735 r"**<font color=#b58900>\1</font>**", userText)
736 except:
737 pass
738 try:
739 header = reClass.sub(
740 r"**<font color=#b58900>\1</font>**", header)
741 except:
742 pass
743 try:
744 header = reClass2.sub(
745 r"**<font color=#b58900>\1</font>**", header)
746 except:
747 pass
749 # SEARCH FOR OBSERVATION KEYWORDS IN HEADER AND TEXT
750 oList = []
751 reObs = re.compile(
752 r'(observations?|Outburst\sof\s|increase\sin\sflux\s|Progenitor\sIdentification|observed?|detects|new\soutburst|monitoring\sof)', re.I)
753 try:
754 ohIter = reObs.finditer(header)
755 except:
756 ohIter = None
757 for item in ohIter:
758 obsHead = 1
759 oList.extend([item.group()])
760 try:
761 otIter = reObs.finditer(userText)
762 except:
763 otIter = None
764 for item in otIter:
765 obsText = 1
766 oList.extend([item.group()])
768 oList = list(set(oList))
769 if len(oList) > 0:
770 try:
771 userText = reObs.sub(
772 r"**<font color=#b58900>\1</font>**", userText)
773 except:
774 pass
775 try:
776 header = reObs.sub(
777 r"**<font color=#b58900>\1</font>**", header)
778 except:
779 pass
781 # SEARCH FOR CORRECTION KEYWORDS IN HEADER AND TEXT
782 tList = []
783 reCor = re.compile(r'((Correction|Erratum|Errata)\sto)', re.I)
784 try:
785 tIter = reCor.finditer(userText + header)
786 except:
787 tIter = None
788 for item in tIter:
789 tList.extend([item.group()])
791 tList = list(set(tList))
792 if len(tList) > 0:
793 correctionHead = 1
794 try:
795 userText = reCor.sub(
796 r"**<font color=#b58900>\1</font>**", userText)
797 except:
798 pass
799 try:
800 header = reCor.sub(
801 r"**<font color=#b58900>\1</font>**", header)
802 except:
803 pass
805 if "Comment" in tags:
806 comment = 1
808 # CREATE AN ATELTYPE TAG -- SIMPLE ROUTINE TO GUESS THE 'TYPE' OF
809 # ATEL
810 atelType = ""
811 obs, clas, disc, correction = 0, 0, 0, 0
812 # GIVE HEADER KEYWORDS PRIORITY OVER THE BODY TEXT
813 if clasHead == 1:
814 clas = 1
815 if obsHead == 1:
816 obs = 1
817 if discHead == 1:
818 disc = 1
819 if correctionHead == 1:
820 correction = 1
821 if comment == 1:
822 comment = 1
824 if clasText == 1 and disc == 0 and obs == 0:
825 clas = 1
826 if obsText == 1 and disc == 0 and clas == 0:
827 obs = 1
828 if discText == 1 and obs == 0 and clas == 0:
829 disc = 1
831 if comment == 1:
832 comment = 1
834 if comment == 1:
835 atelType += " comment "
836 if correction == 1:
837 atelType += " correction "
838 if disc == 1:
839 atelType += " discovery "
840 if clas == 1:
841 atelType += " classification "
842 if obs == 1:
843 atelType += " observation "
845 # if atelType:
846 # atelType = " || **<font color=#b58900>" + atelType + " </font>**
847 # "
848 header = header + atelType
850 # IF THE ATEL-TYPE IS CLASSIFICATION THEN LOOK FOR THE
851 # CLASSIFICATION
852 SNTypeList = []
853 SNTypeReplace = []
854 singleClassification = None
855 oneType = None
856 if "classification" in atelType:
857 try:
858 SNTypeIter = reSNType.finditer(header + userText)
859 except:
860 SNTypeIter is None
862 for item in SNTypeIter:
863 SNTypeReplace.extend([item.group()])
864 SNTypeList.extend([item.group(1)])
865 SNTypeList.extend([item.group(2)])
866 SNTypeList.extend([item.group(4)])
867 SNTypeList.extend([item.group(5)])
868 SNTypeList.extend([item.group(6)])
869 SNTypeList.extend([item.group(8)])
870 SNTypeList = list(set(SNTypeList))
871 SNTypeReplace = list(set(SNTypeReplace))
873 for item in SNTypeReplace:
874 userText = userText.replace(
875 item, " ***<font color=#859900>" + item + " </font>*** ")
876 header = header.replace(
877 item, " ***<font color=#859900>" + item + " </font>*** ")
879 switch = 0
880 for item in SNTypeList:
881 if item:
882 if switch == 0:
883 oneType = item
884 switch = 1
885 else:
886 oneType = None
887 header = header + " ***<font color=#859900>" + \
888 item + " </font>*** "
890 if not atelType:
891 atelType = "observation"
893 dateParsed = times.get_now_sql_datetime()
895 sqlQuery = u"""
896 UPDATE atel_fullcontent
897 SET atelType = "%s",
898 dateParsed = "%s"
899 WHERE atelNUmber = %s
900 """ % (atelType.strip(), dateParsed, atelNumber,)
902 writequery(
903 log=self.log,
904 sqlQuery=sqlQuery,
905 dbConn=self.dbConn,
906 Force=False
907 )
909 isSN = 0
910 if "Supernovae" in tags:
911 isSN = 1
913 # PROVIDE THE SINGLE CLASSIFICATION IF THERE IS ONLY ONE GIVEN
914 if oneType is not None:
915 singleClassification = oneType
916 else:
917 singleClassification = None
919 for item in sList:
920 # CREATE AN ATEL 'NAME' & URL USEFUL FOR INGEST
921 atelName = "atel_" + str(atelNumber)
922 atelUrl = "http://www.astronomerstelegram.org/?read=" + \
923 str(atelNumber)
924 survey = "atel-coords"
925 sqlQuery = """INSERT INTO atel_coordinates (
926 atelNumber,
927 atelName,
928 atelUrl,
929 survey,
930 raDeg,
931 decDeg,
932 supernovaTag
933 )
934 VALUES (
935 %s,
936 "%s",
937 "%s",
938 "%s",
939 %s,
940 %s,
941 %s
942 )""" % (atelNumber, atelName, atelUrl, survey, item[0], item[1], isSN)
944 writequery(
945 log=self.log,
946 sqlQuery=sqlQuery,
947 dbConn=self.dbConn
948 )
950 if singleClassification is not None:
951 sqlQuery = """UPDATE atel_coordinates
952 SET singleClassification = "%s"
953 WHERE atelNumber = %s""" % (singleClassification, atelNumber,)
955 writequery(
956 log=self.log,
957 sqlQuery=sqlQuery,
958 dbConn=self.dbConn
959 )
961 for item in nList:
962 # CREATE AN ATEL 'NAME' & URL USEFUL FOR INGEST
963 atelName = "atel_" + str(atelNumber)
964 atelUrl = "http://www.astronomerstelegram.org/?read=" + \
965 str(atelNumber)
966 survey = "atel-names"
967 sqlQuery = """INSERT INTO atel_names (
968 atelNumber,
969 atelName,
970 atelUrl,
971 survey,
972 name,
973 supernovaTag
974 )
975 VALUES (
976 %s,
977 "%s",
978 "%s",
979 "%s",
980 "%s",
981 %s
982 )""" % (atelNumber, atelName, atelUrl, survey, item, isSN)
984 writequery(
985 log=self.log,
986 sqlQuery=sqlQuery,
987 dbConn=self.dbConn
988 )
990 if singleClassification is not None:
991 sqlQuery = """UPDATE atel_names
992 SET singleClassification = "%s"
993 WHERE atelNumber = %s""" % (singleClassification, atelNumber,)
995 writequery(
996 log=self.log,
997 sqlQuery=sqlQuery,
998 dbConn=self.dbConn
999 )
1001 self.log.debug('completed the ``parse_atels`` method')
1002 return None
1004 def populate_htm_columns(
1005 self):
1006 """*populate htm columns in the atel_coordinates table*
1008 **Usage**
1010 To add the HTM columns (levels 10, 13 & 16) to the atel_coordinates database add this to your code:
1012 ```python
1013 from atelParser import mysql
1014 parser = mysql(
1015 log=log,
1016 settings=settings
1017 )
1018 parser.populate_htm_columns()
1019 ```
1021 """
1022 self.log.debug('starting the ``populate_htm_columns`` method')
1024 from HMpTy.mysql import add_htm_ids_to_mysql_database_table
1025 add_htm_ids_to_mysql_database_table(
1026 raColName="raDeg",
1027 declColName="decDeg",
1028 tableName="atel_coordinates",
1029 dbConn=self.dbConn,
1030 log=self.log,
1031 primaryIdColumnName="primaryId",
1032 dbSettings=self.dbSettings
1033 )
1035 self.log.debug('completed the ``populate_htm_columns`` method')
1036 return None
1038 def _create_atel_database_tables(
1039 self):
1040 """*create the database tables required to host the atel information*
1041 """
1042 self.log.debug('starting the ``_create_atel_database_tables`` method')
1044 # atel_coordinates TABLE
1045 sqlQuery = """CREATE TABLE IF NOT EXISTS `atel_coordinates` (
1046 `primaryId` bigint(20) NOT NULL AUTO_INCREMENT,
1047 `atelNumber` int(11) NOT NULL,
1048 `raDeg` double NOT NULL,
1049 `decDeg` double NOT NULL,
1050 `crossMatchDate` datetime DEFAULT NULL,
1051 `singleClassification` varchar(45) DEFAULT NULL,
1052 `supernovaTag` int(11) DEFAULT NULL,
1053 `ingested` int(11) DEFAULT '0',
1054 `atelName` varchar(45) NOT NULL,
1055 `atelUrl` varchar(200) NOT NULL,
1056 `htm16ID` bigint(20) DEFAULT NULL,
1057 `summaryRow` tinyint(4) DEFAULT NULL,
1058 `survey` varchar(45) NOT NULL,
1059 `titleToComment` tinyint(4) NOT NULL DEFAULT '0',
1060 `htm13ID` int(11) DEFAULT NULL,
1061 `htm10ID` int(11) DEFAULT NULL,
1062 `updated` tinyint(1) DEFAULT '0',
1063 `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP,
1064 `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,
1065 PRIMARY KEY (`primaryId`),
1066 UNIQUE KEY `atelnumber_ra_dec` (`atelNumber`,`raDeg`,`decDeg`),
1067 KEY `ra_deg` (`raDeg`,`decDeg`),
1068 KEY `atelNumber` (`atelNumber`),
1069 KEY `idx_htm16ID` (`htm16ID`),
1070 KEY `idx_htm10ID` (`htm13ID`),
1071 KEY `idx_htm13ID` (`htm13ID`),
1072 KEY `i_htm10ID` (`htm10ID`),
1073 KEY `i_htm13ID` (`htm13ID`),
1074 KEY `i_htm16ID` (`htm16ID`)
1075 ) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;""" % locals()
1076 writequery(
1077 log=self.log,
1078 sqlQuery=sqlQuery,
1079 dbConn=self.dbConn
1080 )
1082 # atel_fullcontent TABLE
1083 sqlQuery = """CREATE TABLE IF NOT EXISTS `atel_fullcontent` (
1084 `primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter',
1085 `atelNumber` int(11) DEFAULT NULL,
1086 `authors` mediumtext,
1087 `backRefList` varchar(2000) DEFAULT NULL,
1088 `dateCreated` datetime DEFAULT NULL,
1089 `dateLastModified` datetime DEFAULT NULL,
1090 `dateLastRead` datetime DEFAULT NULL,
1091 `email` varchar(450) DEFAULT NULL,
1092 `refList` varchar(450) DEFAULT NULL,
1093 `tags` varchar(450) DEFAULT NULL,
1094 `title` varchar(450) DEFAULT NULL,
1095 `userText` mediumtext,
1096 `datePublished` datetime NOT NULL,
1097 `atelType` varchar(500) DEFAULT NULL,
1098 `dateParsed` datetime DEFAULT NULL COMMENT 'The date the ATel text was parsed for names and coordinates',
1099 `updated` tinyint(4) DEFAULT '0',
1100 PRIMARY KEY (`primaryId`),
1101 UNIQUE KEY `atelnumber` (`atelNumber`)
1102 ) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
1103 """ % locals()
1104 writequery(
1105 log=self.log,
1106 sqlQuery=sqlQuery,
1107 dbConn=self.dbConn
1108 )
1110 # atel_coordinates TABLE
1111 sqlQuery = """CREATE TABLE IF NOT EXISTS `atel_names` (
1112 `primaryId` bigint(20) NOT NULL AUTO_INCREMENT,
1113 `atelNumber` int(11) NOT NULL,
1114 `name` varchar(200) NOT NULL,
1115 `crossMatchDate` datetime DEFAULT NULL,
1116 `singleClassification` varchar(45) DEFAULT NULL,
1117 `supernovaTag` int(11) DEFAULT NULL,
1118 `ingested` int(11) DEFAULT '0',
1119 `atelName` varchar(45) NOT NULL,
1120 `atelUrl` varchar(200) NOT NULL,
1121 `survey` varchar(45) NOT NULL,
1122 `titleToComment` tinyint(4) NOT NULL DEFAULT '0',
1123 `summaryRow` tinyint(4) DEFAULT NULL,
1124 PRIMARY KEY (`primaryId`),
1125 UNIQUE KEY `atelnumber_name` (`atelNumber`,`name`),
1126 KEY `atelNumber` (`atelNumber`),
1127 KEY `name` (`name`)
1128 ) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
1129 """ % locals()
1130 writequery(
1131 log=self.log,
1132 sqlQuery=sqlQuery,
1133 dbConn=self.dbConn
1134 )
1136 self.log.debug('completed the ``_create_atel_database_tables`` method')
1137 return None
1140def clean_supernova_name(log, snName):
1141 """
1142 *Clean a SN name.*
1144 This function will attempt to clean up the name so that it is somewhat homogeneous with SN/transient from the same survey/atel system.
1146 **Key Arguments**
1148 - ``log`` -- logger
1149 - ``snName`` -- sn name to be cleaned (string)
1151 **Return**
1153 - ``snName`` -- cleaned sn name (string)
1154 """
1156 # convert bytes to unicode
1157 if isinstance(snName, ("".__class__, u"".__class__)):
1158 try:
1159 snName = str(
1160 snName, encoding="utf-8", errors="replace")
1161 except:
1162 pass
1164 snName = snName.replace(" ", "")
1165 snName = snName.replace(u"–", "-")
1166 snName = snName.replace("FSRQ", "")
1167 snName = snName.replace("Catalogue", "-")
1168 regex = re.compile(r'swift|css|sss|mls|master|^sn', re.I)
1169 if regex.search(snName):
1170 snName = regex.sub(regex.search(snName).group().upper(), snName)
1171 snName = snName.replace("SDSSgalaxy", "SDSS")
1172 snName = snName.replace('MASTERShort', "MASvTER")
1173 snName = snName.replace('MASTEROT', "MASTER")
1174 reMaster = re.compile(r'MASTER([^J])')
1175 snName = reMaster.sub(r'MASTERJ\g<1>', snName)
1176 regex = re.compile(r'SN.LSQ', re.I)
1177 snName = regex.sub('LSQ', snName)
1178 regex = re.compile(r'supernova', re.I)
1179 snName = regex.sub('SN', snName)
1180 regex = re.compile(r'GuideStarCatalog', re.I)
1181 snName = regex.sub('GSC-', snName)
1182 regex = re.compile(r'sdssgalaxy', re.I)
1183 snName = regex.sub('SDSS', snName)
1185 return snName
1187# use the tab-trigger below for new method
1188# xt-class-method