From d9d6bac2bb5ed98bd83614ad6576d89bf24345a4 Mon Sep 17 00:00:00 2001 From: Charles Reid Date: Tue, 11 Sep 2018 13:45:59 -0700 Subject: [PATCH] commit the work in progress so far on converting UA to OOP --- hooks/Readme.md | 20 +- hooks/base_classes.py | 452 ++++++++++++++++++++++++++++++++++++++++ hooks/pr_classes.py | 79 +++++++ hooks/python_classes.py | 101 +++++++++ hooks/simple_classes.py | 23 ++ 5 files changed, 664 insertions(+), 11 deletions(-) create mode 100644 hooks/base_classes.py create mode 100644 hooks/pr_classes.py create mode 100644 hooks/python_classes.py create mode 100644 hooks/simple_classes.py diff --git a/hooks/Readme.md b/hooks/Readme.md index e326e9c..7e7814d 100644 --- a/hooks/Readme.md +++ b/hooks/Readme.md @@ -1,17 +1,15 @@ # Uncle Archie Continuous Integration Hooks -This directory contains scripts that are continous integration -hooks for Uncle Archie. Each file defines a `process_payload()` -that is imported by `../process_payload.py`. +This is the `hooks/` directory, which contains all the hooks +that are run by Uncle Archie to process webhooks. -Every time a webhook is received by Uncle Archie, it calls each -function with the webhook. Each hook script must use the payload -and meta-info provided to it to determine when it should run. +Each time an incoming webhook is received, Uncle Archie creates +one and only one instance of each hook, and is agnostic to the +details of each test. -## TODO +A singleton + factory method is implemented to return one +instance of each hook. This is called by the `process_payload.py` +script (one directory up), which is called by the flask webhook +server. -These hook functions follow certain templates and borrow certain -portions of code heavily from one another. These can be integrated -into objects that define methods to simplify the rote work and -make things re-usable and easier to set up. diff --git a/hooks/base_classes.py b/hooks/base_classes.py new file mode 100644 index 0000000..04d3fdf --- /dev/null +++ b/hooks/base_classes.py @@ -0,0 +1,452 @@ +import tempfile +import datetime +import logging + + +""" +Uncle Archie: Base Classes + +The classes in this file are intended to be used +as base classes only. None of them define a +process_payload() method, which is a virtual +method that must be defined by a Task class +to use that Task with Uncle Archie. + +Derived classes are in: + simple_classes.py + pr_classes.py +""" + + +class UncleArchieTask(object): + """ + Abstract base class. Represents a task that + Uncle Archie performs when a webhook is received. + (Tasks are usually CI tests.) + """ + DEFAULT_LOG_DIR = "/tmp/archie" + DEFAULT_HTDOCS_DIR = "/www/archie.nihdatacommons.us/htdocs/output" + DEFAULT_BASE_URL = "https://archie.nihdatacommons.us/output/" + + def __init__(self,**kwargs): + """ + This performs the initialization procedure + common to all Uncle Archie tasks: + - get name + - get temporary directory + - get log dir/log file + - get htdocs outupt directory + - get htdocs output url + + Remember that we run every hook with every payload, + so process_payload() is where we decide whether to + actually run tests. + + kwargs: + name : Print-friendly name of this task + label : Filename-friendly short label for this task + temp_dir : Directory where the mess will be made and then cleaned up + log_dir : Directory where output logs should go + htdocs_dir : Directory where web-hosted output goes + base_url : Base URL for content hosted at htdocs_dir + """ + self.dt = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + + # Get the name of the task + if 'name' in kwargs: + self.name = kwargs.pop('name') + else: + err = "ERROR: UncleArchieTask: __init__(): name kwarg: " + err += "No name kwarg was provided to the constructor." + logging.error(err) + raise Exception(err) + + # Get the name of the task + if 'label' in kwargs: + self.label = kwargs.pop('label') + else: + err = "ERROR: UncleArchieTask: __init__(): label kwarg: " + err += "No label kwarg was provided to the constructor." + logging.error(err) + raise Exception(err) + + msg = "UncleArchieTask: __init__(): Creating new Uncle Archie Task\n" + msg += "Name: %s\n"%(self.name) + msg += "Label: %s\n"%(self.label) + logging.debug(msg) + + # Get the temporary directory + if 'temp_dir' in kwargs: + self.temp_dir = kwargs.pop('temp_dir') + else: + rand = hashlib.md5(self.dt.encode()).hexdigest() + self.temp_dir = os.path.join('/tmp',rand) + + # If it doesn't exist, make it + if not os.path.isdir(self.temp_dir): + result = subprocess.call(['mkdir','-p',self.temp_dir]) + if result==1: + err = "ERROR: UncleArchieTask: __init__(): temp_dir kwarg: " + err += "Could not create temp dir %s"%(self.temp_dir) + logging.error(err) + raise Exception(err) + + msg = " - Temporary dir: %s"%(self.temp_dir) + logging.debug(msg) + + # Get the output log directory + if 'log_dir' in kwargs: + self.log_dir = kwargs.pop('log_dir') + else: + self.log_dir = DEFAULT_LOG_DIR + + # If it doesn't exist, make it + if not os.path.isdir(self.log_dir): + result = subprocess.call(['mkdir','-p',self.log_dir]) + if result==1: + err = "ERROR: UncleArchieTask: __init__(): log_dir kwarg: " + err += "Could not create log dir %s"%(self.log_dir) + logging.error(err) + raise Exception(err) + + msg = " - Log dir: %s"%(self.log_dir) + logging.debug(msg) + + # Get the htdocs directory + if 'htdocs_dir' in kwargs: + self.htdocs_dir = kwargs.pop('htdocs_dir') + else: + self.htdocs_dir = DEFAULT_HTDOCS_DIR + + # If it doesn't exist, throw a tantrum + if not os.path.isdir(self.htdocs_dir): + err = "ERROR: UncleArchieTask: __init__(): htdocs_dir kwarg: " + err += "Specified htdocs directory \"%s\" "%(self.htdocs_dir) + err += "does not exist." + logging.error(err) + raise Exception(err) + + msg = " - Htdocs dir: %s"%(self.htdocs_dir) + logging.debug(msg) + + # Get the base url + if 'base_url' in kwargs: + self.base_url = kwargs.pop('base_url') + else: + self.base_url = DEFAULT_BASE_URL + + msg = " - Base url: %s"%(self.base_url) + logging.debug(msg) + + # Make log file names - + # these log files are the ones + # linked to in the final UA report. + out_name = make_unique_label("stdout") + self.out_log = os.path.join(self.log_dir,out_name) + self.out = [] # list of strings + + err_name = make_unique_label("stderr") + self.err_log = os.path.join(self.log_dir,err_name) + self.err = [] # list of strings + + payload_name = make_unique_label("payload") + self.payload_log = os.path.join(self.log_dir,payload_name) + + msg = "UncleArchieTask: __init__(): Log locations have been set:\n" + msg += " - Stdout log: %s\n"%(self.out_log) + msg += " - Stderr log: %s\n"%(self.err_log) + msg += " - Payload log: %s\n"%(self.payload_log) + logging.debug(msg) + + msg = "UncleArchieTask: __init__(): Success!" + logging.debug(msg) + + + def make_unique_label(self, label): + """ + Given a label, make it unique with self.dt. + Useful for getting consistent filenames + for output files. + """ + return "%s_%s"%(label,dt) + + + def run_cmd(self, cmd, descr, cwd, **kwargs): + """ + Params: + cmd : (list) the command to run + descr : (string) short description + cwd : (string) curr working dir + + kwargs: + None + + Returns: + abort : (boolean) did the process encounter + errors or exceptions + """ + msg = "UncleArchieTask: run_cmd(): About to run command:\n" + msg += " %s\n"%(" ".join(cmd)) + logging.debug(msg) + + proc = subprocess.Popen( + cmd, + stdout=PIPE, + stderr=PIPE, + cwd=cwd + ) + + o = proc.stdout.read().decode('utf-8') + e = proc.stderr.read().decode('utf-8') + + elines = ["=====================================\n", + "======= CMD: %s\n"%(" ".join(cmd)) + "======= STDOUT\n", + "=====================================\n", + o, + "\n\n" + ] + + elines = ["=====================================\n", + "======= CMD: %s\n"%(" ".join(cmd)) + "======= STDERR\n", + "=====================================\n", + e, + "\n\n" + ] + + self.out += olines + self.err += elines + + msg = "UncleArchieTask: run_cmd(): Finished running command" + logging.debug(msg) + + if "exception" in out.lower + or "exception" in err.lower: + err = " [X] ERROR: UncleArchieTask: run_cmd(): Detected exception [X]" + logging.error(err) + return True + + if "error" in out.lower + or "error" in err.lower: + err = " [X] ERROR: UncleArchieTask: run_cmd(): Detected error [X]" + logging.error(err) + return True + + return False + + + def save_payload(self,payload): + """ + Save the webhook payload to a file + """ + with open(self.payload_log,'w') as f: + f.write(json.dumps(payload, indent=4)) + msg = "UncleArchieTask: save_payload(): Finished saving payload to file %s"%(self.payload_log) + logging.debug(msg) + + + def process_payload(self,payload): + """ + Virtual method: process the webhook payload + """ + err = "ERROR: UncleArchieTask: process_payload(): " + err += "This is a virtual method and must be overridden " + err += "by a child class." + logging.error(err) + raise Exception(err) + + +class GithubTask(UncleArchieTask): + """ + Base class for a Github test. + """ + def __init__(self,**kwargs): + """ + This performs the initialization procedure + for Github-related Uncle Archie tasks. + + Remember, we run every hook with every payload, + so process_payload() is where we have to decide + whether to run this test (i.e. check if the repo in + this payload is on the whitelist). + + kwargs: + github_access_token : (string) API access token + repo_whitelist : (list) whitelisted Github repositories + """ + # Get API key and save it + if 'github_access_token' in kwargs: + self.token = kwargs.pop('github_access_token') + else: + err = "ERROR: GithubTask: __init__(): kwarg github_access_token: " + err += "No Github API access token defined with 'github_access_token' kwarg" + logging.error(err) + raise Exception(err) + + # Get repo whitelist and save it + if 'repo_whitelist' in kwargs: + self.repo_whitelist = kwargs.pop('repo_whitelist') + else: + err = "ERROR: GithubTask: __init__(): kwarg repo_whitelist: " + err += "No Github whitelist defined with 'repo_whitelist' kwarg" + logging.error(err) + raise Exception(err) + + + def get_api_instance(self): + """ + Return a Github API instance (PyGithub object) + """ + g = Github(self.token) + return g + + + def get_clone_url(self,payload): + """ + String: get a clone-able Github URL + for the repository in this payload + """ + if 'repository' in payloads.keys(): + if 'clone_url' in payload['repository'].keys(): + return payloads['repository']['clone_url'] + return None + + + def get_ssh_url(self,payload): + """ + String: get a clone-able SSH Github URL + for the repository in this payload + """ + if 'repository' in payloads.keys(): + if 'ssh_url' in payload['repository'].keys(): + return payloads['repository']['ssh_url'] + return None + + + def get_html_url(self,payload): + """ + String: get an HTML url to the Github repo + for the repository in this payload + """ + if 'repository' in payloads.keys(): + if 'html_url' in payload['repository'].keys(): + return payloads['repository']['html_url'] + return None + + + def get_full_repo_name(self,payload): + """ + String: full repo name: organization/reponame + """ + if 'repository' in payload.keys(): + if 'full_name' in payload['repository'].keys(): + return payload['repository']['full_name'] + return None + + + def get_short_repo_name(self,payload): + """ + String: short repo name + """ + if 'repository' in payload.keys(): + if 'name' in payload['repository'].keys(): + return payload['repository']['name'] + return None + + + def get_pull_request_head(self,payload): + """ + String: head commit of this pull request + """ + if self.is_pull_request(payload): + return payload['pull_request']['head']['sha'] + return None + + + def get_pull_request_number(self,payload): + """ + String: get id number of pull request + """ + if self.is_pull_request(payload): + return payload['number'] + return None + + + def is_pull_request(self,payload): + """ + Boolean: is this webhook a PR? + """ + if 'pull_request' in payload.keys(): + return True + return False + + + def is_pull_request_open(self,payload): + """ + Boolean: is this webhook opening a PR? + """ + if 'action' in payload.keys(): + if payload['action']=='opened': + return True + return False + + + def is_pull_request_sync(self,payload): + """ + Boolean: is this webhook syncing a PR? + """ + if 'action' in payload.keys(): + if payload['action']=='synchronize': + return True + return False + + + def is_pull_request_close(self,payload): + """ + Boolean: is this webhook closing a PR? + """ + if 'action' in payload.keys(): + if payload['action']=='closed': + return True + return False + + + def is_pull_request_merge_commit(self,payload): + """ + Boolean: does this webhook have a PR merge commit? + """ + if self.is_pull_request(payload): + if 'merge_commit_sha' in payload['pull_request']: + return True + return False + + + def set_commit_status( + self, + full_repo_name, + head_commit, + state, + build_msg, + task_name, + url = None + ): + """ + Set the commit status to (state) + with description (description) + and context (context) + """ + g = self.get_api + r = g.get_repo(full_repo_name) + c = r.get_commit(head_commit) + + try: + commit_status = c.create_status( + state = state, + target_url = url, + description = build_msg, + context = task_name + ) + except GithubException as e: + logging.info("ERROR: Github API: Set commit status for %s failed to update."%(head_commit)) + diff --git a/hooks/pr_classes.py b/hooks/pr_classes.py new file mode 100644 index 0000000..b93990a --- /dev/null +++ b/hooks/pr_classes.py @@ -0,0 +1,79 @@ + +class PRTestBase(PythonTask): + """ + PR Test Base Class runs a test on PRs + in whitelisted repos when the PR is opened + or synced. + + This class defines the process_payload method + to perform a git clone of whitelisted repos, + check out the head commit of this PR, and + call the test() method. + + This uses PythonTask as the base class + so we can avoid multiple inheritance. + """ + def process_payload(self,payload): + # Here is where we decide on + # our pattern for process_payload + + if not self.is_pull_request(payload): + return + + if not (self.is_pull_request_open(payload) + or self.is_pull_request_sync(payload)): + return + + self.git_clone() + + self.git_checkout_pr_head() + + # test + # test_success() + # test_fail() + + def test(self): + """ + Virtual method: actually run the PR test + """ + err = "ERROR: PRTestBase: test(): " + err += "This is a virtual method and must be overridden " + err += "by a child class." + logging.error(err) + raise Exception(err) + + + def test_fail(self): + """ + Virtual method: run this when the PR test fails + """ + err = "ERROR: PRTestBase: test_fail(): " + err += "This is a virtual method and must be overridden " + err += "by a child class." + logging.error(err) + raise Exception(err) + + + def test_success(self): + """ + Virtual method: run this when the PR test succeeds + """ + err = "ERROR: PRTestBase: test_success(): " + err += "This is a virtual method and must be overridden " + err += "by a child class." + logging.error(err) + raise Exception(err) + + + def git_clone(self): + """ + Clone the git repository + (We always use the temp dir, so no need + to pass CWD or other into the method) + """ + cmd = ['git','clone',gh_url] + + + def git_checkout(self): + pass + diff --git a/hooks/python_classes.py b/hooks/python_classes.py new file mode 100644 index 0000000..2375cdf --- /dev/null +++ b/hooks/python_classes.py @@ -0,0 +1,101 @@ + +""" +Uncle Archie Python Task + +This base class defines methods +for running a Python task with +Uncle Archie, namely, setting up +a virtual environment and wrapping +calls to python tools (e.g., mkdocs +and snakemake) to use the virtualenv +versions. + +This functionality extends Github +tasks, rather than vice-versa, b/c +the logic is that we may want to run +non-Python tests but still keep the +funtionality to get the head commit +of a pull request, check if merging, +mark commits as success, etc etc. + +Example: docker-compose tests. +""" + +class PythonTask(UncleArchieTask): + """ + Base class for a Python test. + """ + def __init__(self,**kwargs): + """ + This performs the initialization procedure + common to all Uncle Archie tests that use + Python in their task. + + kwargs: + vp_label : What to call the virtual environment + vp_dir : (cwd = curr. working dir) the location + of the virtual environment + """ + super().__init__(**kwargs) + self.setup_virtualenv(**kwargs) + + + def __del__(self,**kwargs): + """ + Destructor + """ + msg = "PythonTask: __del__(): Tearing down virtual environment" + logging.debug(msg) + self.teardown_virtualenv(**kwargs) + + + def setup_virtualenv(self,**kwargs): + """ + Set up a virtual environment. + Called by the constructor. + + kwargs: + vp_label : What to call the virtual environment + vp_dir : (cwd = curr. working dir) the location + of the virtual environment + """ + # Get the name of the virtual environment + if 'vp_label' in kwargs: + self.vp_label = kwargs.pop('vp_label') + else: + self.vp_label = 'vp' + + # Get the directory of the virtual environment + if 'vp_dir' in kwargs: + self.vp_dir = kwargs.pop('vp_dir') + else: + self.vp_dir = self.temp_dir + + msg = "PythonTask: setup_virtualenv(): Creating new virtual environment named \"%s\" "%(self.name) + msg += "in location \"%s\""%(self.vp_dir) + logging.debug(msg) + + # Create the virtual environment + subprocess.call(['virtualenv',vp_label],cwd=self.vp_dir) + + msg = "PythonTask: setup_virtualenv(): Success!" + logging.debug(msg) + + + def teardown_virtualenv(self,**kwargs): + """ + Tear down a virtual environment. + Called by the destructor. + + kwargs: + None + """ + msg = "PythonTask: teardown_virtualenv(): Removing virtual environment at \"%s\" "%(self.vp_dir) + logging.debug(msg) + + # Run the command ourselves, no logging needed + subprocess.call(['rm','-fr',self.vp_dir]) + + msg = "PythonTask: setup_virtualenv(): Success!" + logging.debug(msg) + diff --git a/hooks/simple_classes.py b/hooks/simple_classes.py new file mode 100644 index 0000000..69a497c --- /dev/null +++ b/hooks/simple_classes.py @@ -0,0 +1,23 @@ + +class DumpPayloadTask(GithubTask): + """ + The simplest possible Github task: + this task just dumps out the payload + received from Github to a file. + + This doesn't use any features like + htdocs hosting of the output file + or assembling a permanent link. + """ + def process_payload(self,payload): + """ + Just save every webhook payload + """ + # This is going to be called by every function + # If you want to filter when you dump the payload, + # do it here. + msg = "DumpPayloadTask: process_payload(): Saving webhook payload" + logging.debug(msg) + + self.save_payload(payload) +