-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqiubai.py
47 lines (40 loc) · 1.07 KB
/
qiubai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#coding:utf-8 -*-
import urllib
import urllib2
from bs4 import BeautifulSoup
def print_qiushi(item):
if item.find('div','thumb'):
return
if item.find('div','video_holder'):
return
author = item.find("h2")
if author != None:
author=author.get_text().strip()
content=item.find("div",'content').get_text().strip()
print author
print content
page = 1
url = 'http://www.qiushibaike.com/hot/page/' + str(page)
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }
try:
req= urllib2.Request(url,headers = headers)
res= urllib2.urlopen(req)
soup=BeautifulSoup(res.read(),"lxml")
items=soup.findAll('div','article block untagged mb15')
for item in items:
print_qiushi(item)
except urllib2.URLError, e:
if hasattr(e,"code"):
print e.code
if hasattr(e,"reason"):
print e.reason
#print soup.title
#print soup.title.name
#print soup.title.string
#print soup.p
#print soup.a
#string=soup.findAll('div','article')
#sp2=BeautifulSoup(string[0])
#sp=sp2.span.string
#print sp