Skip to content

Commit

Permalink
exception henadling
Browse files Browse the repository at this point in the history
  • Loading branch information
Macbull committed May 4, 2016
1 parent f51db35 commit 329bf09
Showing 1 changed file with 22 additions and 13 deletions.
35 changes: 22 additions & 13 deletions scrapping_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,34 @@

def get_rss_link(url):
"""Method to scrap RSS Link"""
html = urllib2.urlopen(url).read()
soup = BeautifulSoup(html)
rss_link = soup.find('img','feed-icon').parent['href']
return str(rss_link)
try:
html = urllib2.urlopen(url).read()
soup = BeautifulSoup(html)
rss_link = soup.find('img','feed-icon').parent['href']
except:
return ''
return str(rss_link)

def get_rtc_links(url):
"""Method to scrap RTC Links"""
html = urllib2.urlopen(url).read()
soup = BeautifulSoup(html)
links=[]
rtc_links = soup.find_all('a','esc-fullcoverage-button')
for link in rtc_links:
links.append('http://news.google.com'+str(link['href']))
try:
html = urllib2.urlopen(url).read()
soup = BeautifulSoup(html)
rtc_links = soup.find_all('a','esc-fullcoverage-button')
for link in rtc_links:
links.append('http://news.google.com'+str(link['href']))
except:
pass
return links

def get_see_all_link(url):
"""Method to scrap SEE ALL ARTICLES Link"""
html = urllib2.urlopen(url).read()
soup = BeautifulSoup(html)
link = soup.find('a','more-coverage-text')['href']
return 'http://news.google.com'+str(link)
try:
html = urllib2.urlopen(url).read()
soup = BeautifulSoup(html)
link = soup.find('a','more-coverage-text')['href']
except:
return ''
return 'http://news.google.com'+str(link)

0 comments on commit 329bf09

Please sign in to comment.