-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcreate_voter_spreadsheet_discourse.py
155 lines (110 loc) · 5.61 KB
/
create_voter_spreadsheet_discourse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import math
import requests
import time
from preprocessing import create_voter_spreadsheet
# In Discourse, voters.json takes in a poll and returns a list of users who voted for each option in
# that poll. It returns those users in batches of size NUM_VOTES_PER_API_REQUEST.
# So, if some poll option received NUM_VOTES_PER_API_REQUEST*3 votes, then you would have to query
# voters.json 3 times.
# See https://github.com/discourse/discourse/blob/e0d9232259f6fb0f76bca471c4626178665ca24a/plugins/poll/plugin.rb#L166
NUM_VOTES_PER_API_REQUEST = 50
# how long (in seconds) to wait between API requests (to stay on good terms with the website)
SECONDS_BETWEEN_API_REQUESTS = 3
# The URL from which to request vote info from Discourse
DISCOURSE_VOTES_LOCATION = "https://board.ttvchannel.com/polls/voters.json"
def get_poll_data_from_topic(topic_id, api_headers, verbose=True):
"""
Grab data (poll names, option ids and names, vote counts, etc.) for all of the polls in the
first post of the given topic. Return the JSON represening the first post.
Note this output does not include the actual users who voted in each poll.
"""
topic_url = "https://board.ttvchannel.com/t/" + topic_id + ".json"
if verbose:
print(f"Fetching poll data from {topic_url}....", end="", flush=True)
topic = requests.get(topic_url, headers=api_headers).json()
first_post = topic["post_stream"]["posts"][0]
if verbose:
print(" done." )
print(
f"Found {len(first_post['polls'])} polls"
f" in the first post of topic {topic_id} ({topic['title']})."
)
return first_post
def get_voter_dictionary(post, api_headers, verbose=True):
"""
Grab the voter data from all of the given poll data (produced by get_poll_data_from_topic).
Return the data as a dictionary of user votes keyed by username.
Each vote is a dictionary of rankings (numbers) keyed by entry name.
So, the voter dictionary follows this format:
votes[username][entry_name] = ranking
NOTE: This method assumes that each poll corresponds to a particular ranking and that entries
are listed in the same order across all polls.
NOTE: This method assumes that the poll for rank i is titled poll_i.
"""
if verbose:
print("Fetching and processing vote data...")
votes = {}
for poll_index, poll in enumerate(post["polls"]):
# In the i-th poll (indexed from 1), users vote for which entry should get ranking i.
# For instance, in the 3st poll users assign ranking 3 to an entry (users pick their
# 3rd-favorite entry).
# This assumes that the poll for ranking i has its name attribute set to poll_i.
ranking = int(poll["name"][5:])
# In each poll, users select one entry. Each option in the poll corresponds to a particular
# entry.
# entry_names_keyed_by_id contains the names of the contest entries, each keyed by the id of
# their option in this particular poll.
entry_names_keyed_by_id = {}
for option in poll["options"]:
entry_names_keyed_by_id[option["id"]] = option["html"]
# Grab the users who voted for each option, in batches of size NUM_VOTES_PER_API_REQUEST
# at a time (that's the upper limit batch size enforced by the API).
# the most votes that any of the poll's options received
max_num_votes_to_collect = max(option["votes"] for option in poll["options"])
# the amount of API requests it takes to gather max_num_votes_to_collect votes in batches of
# size NUM_VOTES_PER_API_REQUEST
num_api_requests = math.ceil(max_num_votes_to_collect / NUM_VOTES_PER_API_REQUEST)
if verbose:
print(
f"\tPoll {poll['name']} (poll {poll_index + 1}/{len(post['polls'])},"
f" where users vote for ranking {ranking})"
)
for i in range(num_api_requests):
params = {
"post_id": post["id"],
"poll_name": poll["name"],
"limit": NUM_VOTES_PER_API_REQUEST,
"page": i + 1
}
if verbose:
print(f"\t\tFetching vote data from {DISCOURSE_VOTES_LOCATION}...",
end="", flush=True)
vote_batch = requests.get(DISCOURSE_VOTES_LOCATION,
headers=api_headers, params=params).json()["voters"]
if verbose:
print(" processing data...", end="", flush=True)
for option_id, users in vote_batch.items():
entry_name = entry_names_keyed_by_id[option_id]
for user in users:
username = user["username"]
if username not in votes:
votes[username] = {}
votes[username][entry_name] = ranking
if verbose:
print(" done.")
time.sleep(SECONDS_BETWEEN_API_REQUESTS)
if verbose:
print("Done fetching and processing vote data.")
return votes
def main():
with open("api_credentials.txt") as token_file:
api_key, api_username = token_file.read().split()
api_headers = {"Api-Key": api_key, "Api_Username": api_username}
topic_id = input("Enter the topic's id: ")
output_spreadsheet_file_name = f"raw_vote_data_topic_{topic_id}.csv"
post = get_poll_data_from_topic(topic_id, api_headers)
votes = get_voter_dictionary(post, api_headers)
entry_names = [option["html"] for option in post["polls"][0]["options"]]
create_voter_spreadsheet(votes, entry_names, output_spreadsheet_file_name)
if __name__ == "__main__":
main()