generated from rafsaf/minimal-fastapi-postgres-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
help_functions_arxiv.py
59 lines (41 loc) · 1.82 KB
/
help_functions_arxiv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""
In this file, we collect snippets of code that help you to access all the necessary information from arxiv.org API
that is requested in the coding challenge.
"""
import feedparser
import requests
YOUR_CUSTOM_QUERY="au:Einstein"
# compose URL for arxiv.org API
url = f"https://export.arxiv.org/api/query?search_query={YOUR_CUSTOM_QUERY}&skip=0&max_results=1&sortBy=relevance&sortOrder=descending"
# query arxiv.org with the URL
response = requests.get(url, verify=False)
# add OpenSearch specification to _FeedParserMixin.namespace under key 'opensearch', which defines a standard for
# representing search results in RSS or Atom feeds
feedparser.mixin._FeedParserMixin.namespaces["http://a9.com/-/spec/opensearch/1.1/"] = "opensearch"
# add arxiv namespace to _FeedParserMixin.namespace under key 'arxiv', which defines the arXiv Atom feed
feedparser.mixin._FeedParserMixin.namespaces["http://arxiv.org/schemas/atom"] = "arxiv"
# parse response content
feed = feedparser.parse(response.content)
# access the query
query = feed.get("feed").get("title")
# access the total number of results
total_results = feed.get("feed").get("opensearch_totalresults")
# access the response status code
status = response.status_code
# access the time of the query
query_timestamp_str = response.headers["Date"]
# access any of the results in the feed
entries = feed.entries
# access the third entry in the feed (random choice)
feed_entry_i = feed.entries[0]
feed_entry_all = feed.entries
# access author information (returned as list)
list_of_authors = [author.get("name") for author in feed_entry_i.get("authors")]
authors = ", ".join(list_of_authors)
# access title information
title = feed_entry_i.get("title")
# access journal information
journal = feed_entry_i.get("arxiv_journal_ref")
# print the results
print(f"Query: {query}")
print(f"feed: {feed}")