Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*Retrieve an HTML document or file from the web at a given URL* 

5 

6:Author: 

7 David Young 

8""" 

9from __future__ import print_function 

10from future import standard_library 

11standard_library.install_aliases() 

12from builtins import str 

13import sys 

14import os 

15os.environ['TERM'] = 'vt100' 

16from fundamentals import tools 

17 

18 

19def _fetch(url,): 

20 """ 

21 *Retrieve an HTML document or file from the web at a given URL* 

22 

23 **Key Arguments** 

24 

25 

26 - ``url`` -- the URL of the document or file 

27 

28 **Return** 

29 

30 

31 - ``url`` -- the URL of the document or file, or None if an error occured 

32 - ``body`` -- the text content of the HTML document. 

33 """ 

34 import coloredlogs 

35 import logging as log 

36 import socket 

37 from eventlet import Timeout 

38 import urllib 

39 import sys 

40 

41 # TRY AND DOWNLOAD X TIMES BEFORE QUITING 

42 tries = 10 

43 count = 1 

44 downloaded = False 

45 while count < tries and downloaded == False: 

46 try: 

47 log.debug('downloading ' + url.get_full_url()) 

48 body = urllib.request.urlopen(url).read() 

49 downloaded = True 

50 except socket.timeout as e: 

51 print("timeout on URL, trying again") 

52 count += 1 

53 except Exception as e: 

54 if "[Errno 60]" in str(e): 

55 log.warning('timeout on URL, trying again' % locals()) 

56 count += 1 

57 if "Error 502" in str(e): 

58 log.warning('proxy error on URL, trying again' % locals()) 

59 count += 1 

60 else: 

61 log.warning( 

62 "could not download " + url.get_full_url() + " : " + str(e) + "\n") 

63 url = None 

64 body = None 

65 downloaded = True 

66 

67 return url, body