forked from DBeath/feedsearch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
search.py
95 lines (86 loc) · 2.74 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import logging
import traceback
from pprint import pprint
import click
from feedsearch import search as search_feeds
@click.command()
@click.argument("url")
@click.option(
"--all/--no-all",
default=False,
help="Search all potential locations for feeds. Warning: Slow",
)
@click.option("--info/--no-info", default=False, help="Return additional feed details")
@click.option(
"--parser",
default="html.parser",
type=click.Choice(["html.parser", "lxml", "xml", "html5lib"]),
help="BeautifulSoup parser ('html.parser', 'lxml', 'xml', or 'html5lib'). Defaults to 'html.parser'",
)
@click.option("-v", "--verbose", is_flag=True, help="Show logging")
@click.option(
"--exceptions/--no-exceptions",
default=False,
help="If False, will gracefully handle Requests exceptions and attempt to keep searching."
"If True, will leave Requests exceptions uncaught to be handled externally.",
)
@click.option("--timeout", default=3.05, type=click.FLOAT, help="Request timeout")
@click.option(
"--favicon/--no-favicon", default=False, help="Convert Favicon into Data Uri"
)
@click.option(
"--urls/--no-urls",
default=False,
help="Return found Feeds as a list of URL strings instead of FeedInfo objects.",
)
@click.option(
"--cms/--no-cms",
default=True,
help="Check default CMS feed location if site is using a known CMS.",
)
@click.option(
"--discovery/--no-discovery",
default=False,
help='Only search for RSS discovery tags (e.g. <link rel="alternate" href=...>).',
)
def search(
url, all, info, parser, verbose, exceptions, timeout, favicon, urls, cms, discovery
):
if verbose:
logger = logging.getLogger("feedsearch")
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s [in %(pathname)s:%(lineno)d]"
)
ch.setFormatter(formatter)
logger.addHandler(ch)
click.echo("\nSearching URL {0}\n".format(url))
try:
feeds = search_feeds(
url,
info=info,
check_all=all,
cms=cms,
discovery_only=discovery,
favicon_data_uri=favicon,
as_urls=urls,
parser=parser,
exceptions=exceptions,
timeout=timeout
)
click.echo()
for feed in feeds:
if not urls:
pprint(vars(feed))
print()
else:
click.echo("{0}".format(feed))
return feeds
except Exception as e:
click.echo("Exception: {0}\n".format(e))
click.echo(traceback.format_exc())
return []
if __name__ == "__main__":
search()