From e87ab6e7e46d8eea10c6bf8776dfdca4cd010293 Mon Sep 17 00:00:00 2001 From: Eric Rasche Date: Mon, 14 Sep 2015 21:14:55 -0500 Subject: [PATCH] Initial release --- .gitignore | 63 ++++++++++ README.md | 53 +++++++++ conf.yaml | 64 ++++++++++ process.py | 297 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 + test.py | 212 +++++++++++++++++++++++++++++++++ 6 files changed, 691 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 conf.yaml create mode 100644 process.py create mode 100644 requirements.txt create mode 100644 test.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7911a2b --- /dev/null +++ b/.gitignore @@ -0,0 +1,63 @@ +# Created by https://www.gitignore.io/api/python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +cache.sqlite diff --git a/README.md b/README.md new file mode 100644 index 0000000..309ca25 --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +# Proper Prior PR Planning + +Simple bot to comment on GitHub repositories to help guide repository +maintenance. This includes things like ensuring enough members of the core team ++1 a pull request, and otherwise help ensure that procedures are consistently +followed for merging of PRs. + +## How it works + +- run the script as regularly as seems reasonable (and stays within your GH API + limits) +- the script fetches all pull requests for a repository +- this data is compared against a list +- those that have changed since last time are investigated individually +- various filters are applied to the PR, with user-defined behaviour resulting + if the PR matches a filter +- the database is updated + + +## Example Run + +Our first run we watch the bot find a PR (#1), and evaluate a number of states. +It finds that the PR is open, it doesn't contain `[PROCEDURES]`, and that it +has one or more `:+1:` votes (emoji required) + +```log +(env)hxr@leda:~/work/p4$ python process.py +INFO:root:Registered PullRequestFilter Check PRs to dev for mergability +INFO:requests.packages.urllib3.connectionpool:Starting new HTTPS connection (1): api.github.com +DEBUG:requests.packages.urllib3.connectionpool:"GET /repos/erasche/gx-package-caching/pulls?per_page=100&state=open&page=1 HTTP/1.1" 200 None +INFO:root:Built PullRequest #1 Testing change for p4 +DEBUG:root:Found 1 PRs to examine +DEBUG:root:[Check PRs to dev for mergability] Evaluating state open for <#1 "Testing change for p4" by @erasche> +DEBUG:root:[Check PRs to dev for mergability] Evaluating title_contains__not [PROCEDURES] for <#1 "Testing change for p4" by @erasche> +DEBUG:root:[Check PRs to dev for mergability] Evaluating plus__ge 1 for <#1 "Testing change for p4" by @erasche> +INFO:requests.packages.urllib3.connectionpool:Starting new HTTPS connection (1): api.github.com +DEBUG:requests.packages.urllib3.connectionpool:"GET /repos/erasche/gx-package-caching/issues/1/comments?per_page=100&page=1 HTTP/1.1" 200 None +DEBUG:root:[Check PRs to dev for mergability] Evaluating to_branch master for <#1 "Testing change for p4" by @erasche> +INFO:root:Matched Check PRs to dev for mergability +INFO:root:Executing action +``` + +The second time around our state is stored in the database, so we fail to find +any PRs that need comments. + +``` +(env)hxr@leda:~/work/p4$ python process.py +INFO:root:Registered PullRequestFilter Check PRs to dev for mergability +INFO:requests.packages.urllib3.connectionpool:Starting new HTTPS connection (1): api.github.com +DEBUG:requests.packages.urllib3.connectionpool:"GET /repos/erasche/gx-package-caching/pulls?per_page=100&state=open&page=1 HTTP/1.1" 200 None +2015-09-15 02:07:00 2015-09-15 02:07:00 +DEBUG:root:Found 0 PRs to examine +``` diff --git a/conf.yaml b/conf.yaml new file mode 100644 index 0000000..5de58b2 --- /dev/null +++ b/conf.yaml @@ -0,0 +1,64 @@ +meta: + database_path: ./cache.sqlite + bot_user: erasche + +repository: + owner: galaxyproject + name: galaxy + pr_approvers: + - martenson + - afgane + - bgruening + - blankenberg + - carlfeberhard + - dannon + - davebx + - erasche + - guerler + - jennaj + - jgoecks + - jmchilton + - jxtx + - natefoo + - nekrut + - nitesh1989 + - nsoranzo + - tnabtaf + filters: + - + name: Check Procedures PRs for mergability + conditions: + - state: 'open' + - title_contains: '[PROCEDURES]' + - older_than: 168 hours ago + - plus__gt: 5 + - minus__eq: 0 + actions: + - + action: comment + comment: "{author}'s PR has reached the threshold of 168 hours and quorum of at least 5 binding +1 votes. One of the maintainers is now bound to merge this PR." + #- + #action: merge + #comment: "Automatically merged by Galaxy Jenkins Github Bot" + - + name: Check PRs to dev for mergability + conditions: + - state: 'open' + - title_contains__not: '[PROCEDURES]' + - plus__ge: 1 + - to_branch: dev + actions: + - + action: comment + comment: "{author}'s PR has reached the required number of votes for merging. Does anyone wish to do this?" + - + name: Check bugfix PRs for mergability + conditions: + - state: 'open' + - title_contains__not: '[PROCEDURES]' + - plus__ge: 2 + - to_branch__not: dev + actions: + - + action: comment + comment: "{author}'s PR has reached the required number of votes for merging. Does anyone wish to do this?" diff --git a/process.py b/process.py new file mode 100644 index 0000000..5d9d0bc --- /dev/null +++ b/process.py @@ -0,0 +1,297 @@ +import os +import yaml +from pygithub3 import Github +import sqlite3 +import datetime +import parsedatetime +import logging +logging.basicConfig(level=logging.DEBUG) +log = logging.getLogger() + +gh = Github( + login=os.environ.get('GITHUB_USERNAME', None), + password=os.environ.get('GITHUB_PASSWORD', None) +) + + +class PullRequestFilter(object): + + def __init__(self, name, conditions, actions, committer_group, repo_owner, + repo_name, bot_user): + self.name = name + self.conditions = conditions + self.actions = actions + self.committer_group = committer_group + self.repo_owner = repo_owner + self.repo_name = repo_name + self.bot_user = bot_user + log.info("Registered PullRequestFilter %s", name) + + def condition_it(self): + for condition_dict in self.conditions: + for key in condition_dict: + yield (key, condition_dict[key]) + + def apply(self, pr): + for (condition_key, condition_value) in self.condition_it(): + log.debug("[%s] Evaluating %s %s for %s", self.name, condition_key, condition_value, pr) + if not self.evaluate(pr, condition_key, condition_value): + return + + log.info("Matched %s", self.name) + + # If we've made it this far, we pass ALL conditions + for action in self.actions: + self.execute(pr, action) + + return True + + def evaluate(self, pr, condition_key, condition_value): + # Some conditions contain an aditional operation we must respect, e.g. + # __gt or __eq + if '__' in condition_key: + (condition_key, condition_op) = condition_key.split('__', 1) + else: + condition_op = None + + func = getattr(self, 'check_' + condition_key) + result = func(pr, cv=condition_value) + + # There are two types of conditions, text and numeric. + # Numeric conditions are only appropriate for the following types: + # 1) plus, 2) minus + if condition_key in ('plus', 'minus'): + if condition_op == 'gt': + return int(result) > int(condition_value) + elif condition_op == 'ge': + return int(result) >= int(condition_value) + elif condition_op == 'eq': + return int(result) == int(condition_value) + elif condition_op == 'ne': + return int(result) != int(condition_value) + elif condition_op == 'lt': + return int(result) < int(condition_value) + elif condition_op == 'le': + return int(result) <= int(condition_value) + # Then there are the next set of tpyes which are mostly text types + else: + # These have generally already been evaluated by the function, we + # just return value/!value + if condition_op == 'not': + return not result + else: + return result + + def check_title_contains(self, pr, cv=None): + return cv in pr.title + + def check_state(self, pr, cv=None): + return pr.state == cv + + def _find_in_comments(self, comments, text): + for page in comments: + for resource in page: + if text in resource.body: + yield resource + + def check_plus(self, pr, cv=None): + if getattr(pr, '_comments', None) is None: + pr._comments = gh.issues.comments.list( + pr.number, user=self.repo_owner, repo=self.repo_name) + + count = 0 + for plus1_comment in self._find_in_comments(pr._comments, ':+1:'): + if plus1_comment.user.login in self.committer_group: + count += 1 + return count + + def check_minus(self, pr, cv=None): + if getattr(pr, '_comments', None) is None: + pr._comments = gh.issues.comments.list( + pr.number, user=self.repo_owner, repo=self.repo_name) + + count = 0 + for minus1_comment in self._find_in_comments(pr._comments, ':-1:'): + if minus1_comment.user.login in self.committer_group: + count += 1 + return count + + def check_to_branch(self, pr, cv=None): + return pr.resource.base['ref'] == cv + + def check_older_than(self, pr, cv=None): + created_at = pr.created_at + current = datetime.datetime.now() + + calendar = parsedatetime.Calendar() + current_adjusted, parsed_as = calendar.parseDT(cv, current) + + return (created_at - current_adjusted).total_seconds() < 0 + + def execute(self, pr, action): + if action['action'] != 'comment': + raise NotImplementedError("Action %s is not available" % + action['action']) + + comment_text = action['comment'].format( + author='@' + pr.user['login'] + ) + + log.info("Executing action") + + if getattr(pr, '_comments', None) is None: + pr._comments = gh.issues.comments.list( + pr.number, user=self.repo_owner, repo=self.repo_name) + + # Check if we've made this exact comment before, so we don't comment + # multiple times and annoy people. + for possible_bot_comment in self._find_in_comments( + pr._comments, comment_text): + + if possible_bot_comment.user.login == self.bot_user: + log.info("Comment action previously applied, not duplicating") + else: + log.info("Comment action previously applied, not duplicating. However it was applied under a different user. Strange?") + + return + + # Create the comment + gh.issues.comments.create( + pr.number, + comment_text, + user=self.repo_owner, + repo=self.repo_name, + ) + + return True + + +class PullRequest(object): + + def __init__(self, resource): + self.resource = resource + + for key in ('number', 'title', 'updated_at', 'url', 'user', 'body', + 'created_at', 'state', 'id'): + setattr(self, key, getattr(self.resource, key, None)) + + log.info("Built PullRequest #%s %s", self.number, self.title) + # 'assignee', 'base', 'body', 'closed_at', 'comments_url', + # 'commits_url', 'created_at', 'diff_url', 'head', 'html_url', 'id', + # 'issue_url', 'loads', 'locked', 'merge_commit_sha', 'merged_at', + # 'milestone', 'number', 'patch_url', 'review_comment_url', + # 'review_comments_url', 'state', 'statuses_url', 'title', + # 'updated_at', 'url', 'user' + + def __str__(self): + return '<#%s "%s" by @%s>' % (self.number, self.title, self.user['login']) + + +class MergerBot(object): + + def __init__(self, conf_path): + with open(conf_path, 'r') as handle: + self.config = yaml.load(handle) + + self.create_db(database_name=os.path.abspath( + self.config['meta']['database_path'])) + + self.timefmt = "%Y-%m-%dT%H:%M:%S.Z" + + self.pr_filters = [] + for rule in self.config['repository']['filters']: + prf = PullRequestFilter( + name=rule['name'], + conditions=rule['conditions'], + actions=rule['actions'], + + # ugh + committer_group=self.config['repository']['pr_approvers'], + repo_owner=self.config['repository']['owner'], + repo_name=self.config['repository']['name'], + bot_user=self.config['meta']['bot_user'], + ) + self.pr_filters.append(prf) + + def create_db(self, database_name='cache.sqlite'): + self.conn = sqlite3.connect(database_name) + cursor = self.conn.cursor() + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS pr_data( + pr_id INTEGER PRIMARY KEY, + updated_at TEXT + ) + """ + ) + + def fetch_pr_from_db(self, id): + cursor = self.conn.cursor() + cursor.execute("""SELECT * FROM pr_data WHERE pr_id == ?""", (str(id), )) + row = cursor.fetchone() + + if row is None: + return row + + pretty_row = ( + row[0], + datetime.datetime.strptime(row[1], self.timefmt) + ) + return pretty_row + + def cache_pr(self, id, updated_at): + cursor = self.conn.cursor() + cursor.execute("""INSERT INTO pr_data VALUES (?, ?)""", + (str(id), updated_at.strftime(self.timefmt))) + self.conn.commit() + + def update_pr(self, id, updated_at): + cursor = self.conn.cursor() + cursor.execute("""UPDATE pr_data SET updated_at = ? where pr_id = ?""", + (updated_at.strftime(self.timefmt), str(id))) + self.conn.commit() + + def get_prs2(self): + results = gh.pull_requests.list( + state='open', + user=self.config['repository']['owner'], + repo=self.config['repository']['name']) + # This will contain a list of all new/updated PRs to filter + changed_prs = [] + # Loop across our GH results + for page in results: + for resource in page: + # Fetch the PR's ID which we use as a key in our db. + cached_pr = self.fetch_pr_from_db(resource.id) + # If it's new, cache it. + if cached_pr is None: + self.cache_pr(resource.id, resource.updated_at) + changed_prs.append(PullRequest(resource)) + else: + # compare updated_at times. + cached_pr_time = cached_pr[1] + print cached_pr_time, resource.updated_at + if cached_pr_time != resource.updated_at: + changed_prs.append(PullRequest(resource)) + break + break + return changed_prs + + def run(self): + changed_prs = self.get_prs2() + # changed_prs = [ + # PullRequest(gh.pull_requests.get( + # 1, + # user=self.config['repository']['owner'], + # repo=self.config['repository']['name'])) + # ] + log.debug("Found %s PRs to examine", len(changed_prs)) + for changed in changed_prs: + for pr_filter in self.pr_filters: + pr_filter.apply(changed) + self.update_pr(changed.id, changed.updated_at) + + +if __name__ == '__main__': + bot = MergerBot('conf.yaml') + bot.run() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..728690d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +pygithub3 +pyyaml diff --git a/test.py b/test.py new file mode 100644 index 0000000..0ed19bb --- /dev/null +++ b/test.py @@ -0,0 +1,212 @@ +# -*- coding: utf-8 -*- +import unittest +from process import PullRequestFilter +import datetime +import parsedatetime +from attrdict import AttrDict + + +class TestPullRequestFilter(unittest.TestCase): + + def setUp(self): + pass + + def _get_dt_from_relative(self, relative): + calendar = parsedatetime.Calendar() + adj, parsed_as = calendar.parseDT(relative, datetime.datetime.now()) + return adj + + def test_check_older_than(self): + prf = PullRequestFilter("test_filter", [], []) + fakepr = AttrDict({ + 'created_at': self._get_dt_from_relative("1 day ago") + }) + + self.assertFalse( + prf.check_older_than( + fakepr, + cv="7 days ago" + ) + ) + + self.assertTrue( + prf.check_older_than( + fakepr, + cv="tomorrow" + ) + ) + + prf = PullRequestFilter("test_filter", [], []) + fakepr = AttrDict({ + 'created_at': self._get_dt_from_relative("10 days ago") + }) + + self.assertTrue( + prf.check_older_than( + fakepr, + cv="7 days ago" + ) + ) + + prf = PullRequestFilter("test_filter", [], []) + fakepr = AttrDict({ + 'created_at': self._get_dt_from_relative("1 day ago") + }) + + self.assertTrue( + prf.check_older_than( + fakepr, + cv="today" + ) + ) + + self.assertFalse( + prf.check_older_than( + fakepr, + cv="2 days ago" + ) + ) + + def test_check_to_branch(self): + prf = PullRequestFilter("test_filter", [], []) + fakepr = AttrDict({ + 'resource': { + 'base': { + 'ref': 'dev' + } + } + }) + + self.assertTrue( + prf.check_to_branch( + fakepr, + cv="dev" + ) + ) + + self.assertFalse( + prf.check_to_branch( + fakepr, + cv="notdev" + ) + ) + + def test_check_state(self): + prf = PullRequestFilter("test_filter", [], []) + fakepr = AttrDict({ + 'state': 'open' + }) + + self.assertTrue( + prf.check_state( + fakepr, + cv="open" + ) + ) + + self.assertFalse( + prf.check_state( + fakepr, + cv="closed" + ) + ) + + def test_check_title_contains(self): + prf = PullRequestFilter("test_filter", [], []) + fakepr = AttrDict({ + u'title': u'[PROCEDURES] Testing…' + }) + + self.assertTrue( + prf.check_title_contains( + fakepr, + cv="[PROCEDURES]" + ) + ) + + self.assertFalse( + prf.check_title_contains( + fakepr, + cv="anything-else" + ) + ) + def test_pr_evaluate(self): + prf = PullRequestFilter("test_filter", [], []) + fakepr = AttrDict({ + u'title': u'[PROCEDURES] Testing…', + u'state': u'open', + u'resource': { + u'base': { + u'ref': u'dev' + } + }, + u'created_at': self._get_dt_from_relative("1 day ago") + }) + + self.assertTrue( + prf.evaluate( + fakepr, + 'older_than__not', + '3 days ago', + ) + ) + + self.assertTrue( + prf.evaluate( + fakepr, + 'older_than', + 'today', + ) + ) + + def test_prf_apply_eval(self): + prf = PullRequestFilter( + "test_filter", + { + 'state': 'open', + 'title_contains': '[PROCEDURES]', + 'title_contains__not': 'Blah', + 'to_branch': 'dev', + 'older_than': '0 days ago', + 'older_than__not': '2 days ago', + }, + [] + ) + + fakepr = AttrDict({ + u'title': u'[PROCEDURES] Testing…', + u'state': u'open', + u'resource': { + u'base': { + u'ref': u'dev' + } + }, + u'created_at': self._get_dt_from_relative("1 day ago") + }) + + for (condition_key, condition_value) in prf.conditions.items(): + self.assertTrue(prf.evaluate(fakepr, condition_key, condition_value)) + + def test_prf_condition_iterator(self): + prf = PullRequestFilter( + "test_filter", + [ + {'state': 'open', 'title_contains': '[PROCEDURES]'}, + {'to_branch': 'dev', 'older_than': '0 days ago'}, + {'title_contains__not': 'Blah'}, + {'older_than__not': '2 days ago'}, + ], + [] + ) + + self.assertEquals( + sorted(list(prf.condition_it())), + sorted([ + ('state', 'open'), + ('title_contains', '[PROCEDURES]'), + ('title_contains__not', 'Blah'), + ('to_branch', 'dev'), + ('older_than', '0 days ago'), + ('older_than__not', '2 days ago'), + ]) + )