beam-github mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [beam] fernando-wizeline commented on a change in pull request #15039: [BEAM-10913] - Post-commit github actions status
Date Thu, 01 Jul 2021 17:47:50 GMT

fernando-wizeline commented on a change in pull request #15039:
URL: https://github.com/apache/beam/pull/15039#discussion_r662483296



##########
File path: .test-infra/metrics/sync/github/sync_github_actions.py
##########
@@ -0,0 +1,142 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+'''
+This module queries GitHub to collect pre-commit GitHub Actions metrics and put them in
+PostgreSQL.
+'''
+import sys
+import time
+import psycopg2
+import datetime
+
+# Keeping this as reference for localhost debug
+# Fetching docker host machine ip for testing purposes.
+# Actual host should be used for production.
+def findDockerNetworkIP():
+  '''Utilizes ip tool to find docker network IP'''
+  import subprocess
+  cmd_out = subprocess.check_output(["ip", "route", "show"]).decode("utf-8")
+  return cmd_out.split(" ")[2]
+
+
+#DB_HOST = findDockerNetworkIP()
+
+GH_PRS_TABLE_NAME = 'gh_actions_metrics'
+
+GH_PRS_CREATE_TABLE_QUERY = f"""
+  create table {GH_PRS_TABLE_NAME} (
+  ga_id serial NOT NULL PRIMARY KEY,
+  job_name varchar NOT NULL,
+  status varchar NOT NULL,
+  workflow_id varchar NOT NULL,
+  workflow_url varchar NOT NULL,
+  executed_ts timestamp NOT NULL
+  )
+  """
+
+def initDBConnection():
+  '''Opens connection to postgresql DB, as configured via global variables.'''
+
+  DB_HOST = sys.argv[4]
+  DB_PORT = sys.argv[5]
+  DB_NAME = sys.argv[6]
+  DB_USER_NAME = sys.argv[7]
+  DB_PASSWORD = sys.argv[8]
+
+  conn = None
+  while not conn:
+    try:
+      conn = psycopg2.connect(
+          f"dbname='{DB_NAME}' user='{DB_USER_NAME}' host='{DB_HOST}'"
+          f" port='{DB_PORT}' password='{DB_PASSWORD}'")
+    except:
+      print('Failed to connect to DB; retrying in one hour')
+      sys.stdout.flush()
+      time.sleep(3600)
+  return conn
+
+def tableExists(cursor, tableName):
+  '''Checks the existense of table.'''
+  cursor.execute(f"select * from information_schema.tables"
+                 f" where table_name='{tableName}';")
+  return bool(cursor.rowcount)
+
+def initDbTablesIfNeeded():
+  '''Creates and initializes DB tables required for script to work.'''
+  connection = initDBConnection()
+  cursor = connection.cursor()
+
+  buildsTableExists = tableExists(cursor, GH_PRS_TABLE_NAME)
+  print('PRs table exists', buildsTableExists)
+  if not buildsTableExists:
+    cursor.execute(GH_PRS_CREATE_TABLE_QUERY)
+    if not bool(cursor.rowcount):
+      raise Exception(f"Failed to create table {GH_PRS_TABLE_NAME}")
+  cursor.close()
+  connection.commit()
+
+  connection.close()
+
+def insertIntoTable(cursor, values):
+  insertRowQuery = f'''INSERT INTO {GH_PRS_TABLE_NAME}
+                            (job_name,
+                            status,
+                            workflow_id,
+                            workflow_url,
+                            executed_ts)
+                          VALUES
+                            (%s, %s, %s, %s, %s)
+                          '''
+  cursor.execute(insertRowQuery, values)
+
+def fetchNewData():
+  '''
+  Main workhorse method. Fetches data from GitHub and puts it in metrics table.
+  '''
+
+  job_name = sys.argv[1]
+  status = sys.argv[2]
+  workflow_id = sys.argv[3]
+  workflow_url = f'''https://github.com/fernando-wizeline/beam/actions/runs/{workflow_id}'''
#TODO: remove reference to personal repository
+  executed_ts = datetime.datetime.now()
+  row_values = [job_name, status, workflow_id, workflow_url, executed_ts]

Review comment:
       > is this method fetching any data from github? I think it is not , right?
   
   The method itself it is not pulling the data, but rather upserting it after the data was
feed to it.
   I'll update the comment and name of the method.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@beam.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



Mime
View raw message