From 329bf098833ca57a7d6ddf5b1125b5523baeda23 Mon Sep 17 00:00:00 2001 From: Vishwajeet Narwal Date: Wed, 4 May 2016 17:49:50 +0530 Subject: [PATCH] exception henadling --- scrapping_methods.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/scrapping_methods.py b/scrapping_methods.py index ff10600..d959279 100644 --- a/scrapping_methods.py +++ b/scrapping_methods.py @@ -5,25 +5,34 @@ def get_rss_link(url): """Method to scrap RSS Link""" - html = urllib2.urlopen(url).read() - soup = BeautifulSoup(html) - rss_link = soup.find('img','feed-icon').parent['href'] - return str(rss_link) + try: + html = urllib2.urlopen(url).read() + soup = BeautifulSoup(html) + rss_link = soup.find('img','feed-icon').parent['href'] + except: + return '' + return str(rss_link) def get_rtc_links(url): """Method to scrap RTC Links""" - html = urllib2.urlopen(url).read() - soup = BeautifulSoup(html) links=[] - rtc_links = soup.find_all('a','esc-fullcoverage-button') - for link in rtc_links: - links.append('http://news.google.com'+str(link['href'])) + try: + html = urllib2.urlopen(url).read() + soup = BeautifulSoup(html) + rtc_links = soup.find_all('a','esc-fullcoverage-button') + for link in rtc_links: + links.append('http://news.google.com'+str(link['href'])) + except: + pass return links def get_see_all_link(url): """Method to scrap SEE ALL ARTICLES Link""" - html = urllib2.urlopen(url).read() - soup = BeautifulSoup(html) - link = soup.find('a','more-coverage-text')['href'] - return 'http://news.google.com'+str(link) + try: + html = urllib2.urlopen(url).read() + soup = BeautifulSoup(html) + link = soup.find('a','more-coverage-text')['href'] + except: + return '' + return 'http://news.google.com'+str(link)