Fetch test grid data#

  • In this notebook, we fetch relevant data from https://testgrid.k8s.io and save on Ceph for further analysis.

  • This is supposed to be run in automation as a part of kubeflow pipeline.

## Import libraries
import datetime
import os
import json
from dotenv import load_dotenv, find_dotenv
import requests
from bs4 import BeautifulSoup
from ipynb.fs.defs.metric_template import CephCommunication

load_dotenv(find_dotenv())
True
## Specify variables

# Specify the path for input grid data
INPUT_DATA_PATH = (
    "https://testgrid.k8s.io/redhat-openshift-informing?id=dashboard-group-bar"
)

# Specify the path for output raw data
OUTPUT_DATA_PATH = "../../../../data/raw"

## CEPH Bucket variables
## Create a .env file on your local with the correct configs
s3_endpoint_url = os.getenv("S3_ENDPOINT")
s3_access_key = os.getenv("S3_ACCESS_KEY")
s3_secret_key = os.getenv("S3_SECRET_KEY")
s3_bucket = os.getenv("S3_BUCKET")
s3_input_data_path = "raw_data"

# Specify whether or not we are running this as a notebook or part of an automation pipeline.
AUTOMATION = os.getenv("IN_AUTOMATION")
## Connect to the url and fetch dashboard names
response = requests.get(INPUT_DATA_PATH)
html = BeautifulSoup(response.content)
testgrid_script = html.findAll("script")[3]
testgrid_script = testgrid_script.text.split()[5].split(",")
dashboard_names = [x.split(":")[1] for x in testgrid_script if "name" in x]
dashboard_names
['"redhat-assisted-installer"',
 '"redhat-openshift-informing"',
 '"redhat-openshift-ocp-release-4.1-blocking"',
 '"redhat-openshift-ocp-release-4.1-informing"',
 '"redhat-openshift-ocp-release-4.2-blocking"',
 '"redhat-openshift-ocp-release-4.2-informing"',
 '"redhat-openshift-ocp-release-4.3-blocking"',
 '"redhat-openshift-ocp-release-4.3-broken"',
 '"redhat-openshift-ocp-release-4.3-informing"',
 '"redhat-openshift-ocp-release-4.4-blocking"',
 '"redhat-openshift-ocp-release-4.4-broken"',
 '"redhat-openshift-ocp-release-4.4-informing"',
 '"redhat-openshift-ocp-release-4.5-blocking"',
 '"redhat-openshift-ocp-release-4.5-broken"',
 '"redhat-openshift-ocp-release-4.5-informing"',
 '"redhat-openshift-ocp-release-4.6-blocking"',
 '"redhat-openshift-ocp-release-4.6-broken"',
 '"redhat-openshift-ocp-release-4.6-informing"',
 '"redhat-openshift-ocp-release-4.7-blocking"',
 '"redhat-openshift-ocp-release-4.7-broken"',
 '"redhat-openshift-ocp-release-4.7-informing"',
 '"redhat-openshift-ocp-release-4.8-blocking"',
 '"redhat-openshift-ocp-release-4.8-informing"',
 '"redhat-openshift-ocp-release-4.9-blocking"',
 '"redhat-openshift-ocp-release-4.9-informing"',
 '"redhat-openshift-okd-release-4.3-informing"',
 '"redhat-openshift-okd-release-4.4-informing"',
 '"redhat-openshift-okd-release-4.5-blocking"',
 '"redhat-openshift-okd-release-4.5-informing"',
 '"redhat-openshift-okd-release-4.6-blocking"',
 '"redhat-openshift-okd-release-4.6-informing"',
 '"redhat-openshift-okd-release-4.7-blocking"',
 '"redhat-openshift-okd-release-4.7-informing"',
 '"redhat-openshift-okd-release-4.8-blocking"',
 '"redhat-openshift-okd-release-4.8-informing"',
 '"redhat-openshift-okd-release-4.9-informing"',
 '"redhat-openshift-presubmit-master-gcp"',
 '"redhat-osd"',
 '"redhat-single-node"']
## Download the dashboard data
download = True
if download:
    data_set = {}

    for dashboard in dashboard_names:
        response_1 = requests.get(f"https://testgrid.k8s.io/{dashboard}/summary")
        jobs = response_1.json().keys()
        dashboard_jobs = {}

        for job in jobs:
            response_2 = requests.get(
                f"https://testgrid.k8s.io/{dashboard}/table?&show-stale-tests=&tab={job}&graph-metrics=test-duration-minutes"  # noqa
            )
            if response_2.status_code != 200:
                continue

            if "tests" in response_2.json():
                grid = []
                for x in response_2.json()["tests"]:
                    test = {"name": x["name"], "statuses": x["statuses"]}
                    if "graphs" in x.keys():
                        test["graphs"] = x["graphs"]
                    else:
                        test["graphs"] = None
                    grid.append(test)

                time_stamps = response_2.json()["timestamps"]

                dashboard_jobs[job] = {"grid": grid, "timestamps": time_stamps}

        data_set[dashboard] = dashboard_jobs
        print(f"{dashboard} downloaded ")
else:
    print("Not Downloading")
"redhat-assisted-installer" downloaded 
"redhat-openshift-informing" downloaded 
"redhat-openshift-ocp-release-4.1-blocking" downloaded 
"redhat-openshift-ocp-release-4.1-informing" downloaded 
"redhat-openshift-ocp-release-4.2-blocking" downloaded 
"redhat-openshift-ocp-release-4.2-informing" downloaded 
"redhat-openshift-ocp-release-4.3-blocking" downloaded 
"redhat-openshift-ocp-release-4.3-broken" downloaded 
"redhat-openshift-ocp-release-4.3-informing" downloaded 
"redhat-openshift-ocp-release-4.4-blocking" downloaded 
"redhat-openshift-ocp-release-4.4-broken" downloaded 
"redhat-openshift-ocp-release-4.4-informing" downloaded 
"redhat-openshift-ocp-release-4.5-blocking" downloaded 
"redhat-openshift-ocp-release-4.5-broken" downloaded 
"redhat-openshift-ocp-release-4.5-informing" downloaded 
"redhat-openshift-ocp-release-4.6-blocking" downloaded 
"redhat-openshift-ocp-release-4.6-broken" downloaded 
"redhat-openshift-ocp-release-4.6-informing" downloaded 
"redhat-openshift-ocp-release-4.7-blocking" downloaded 
"redhat-openshift-ocp-release-4.7-broken" downloaded 
"redhat-openshift-ocp-release-4.7-informing" downloaded 
"redhat-openshift-ocp-release-4.8-blocking" downloaded 
"redhat-openshift-ocp-release-4.8-informing" downloaded 
"redhat-openshift-ocp-release-4.9-blocking" downloaded 
"redhat-openshift-ocp-release-4.9-informing" downloaded 
"redhat-openshift-okd-release-4.3-informing" downloaded 
"redhat-openshift-okd-release-4.4-informing" downloaded 
"redhat-openshift-okd-release-4.5-blocking" downloaded 
"redhat-openshift-okd-release-4.5-informing" downloaded 
"redhat-openshift-okd-release-4.6-blocking" downloaded 
"redhat-openshift-okd-release-4.6-informing" downloaded 
"redhat-openshift-okd-release-4.7-blocking" downloaded 
"redhat-openshift-okd-release-4.7-informing" downloaded 
"redhat-openshift-okd-release-4.8-blocking" downloaded 
"redhat-openshift-okd-release-4.8-informing" downloaded 
"redhat-openshift-okd-release-4.9-informing" downloaded 
"redhat-openshift-presubmit-master-gcp" downloaded 
"redhat-osd" downloaded 
"redhat-single-node" downloaded 
## Set filename
date = datetime.datetime.today()
filename = f"testgrid_{date.day}{date.month}.json"
timestamp = datetime.datetime.now()

if AUTOMATION:
    ## Connect to Ceph
    cc = CephCommunication(s3_endpoint_url, s3_access_key, s3_secret_key, s3_bucket)

    ## Put data on ceph
    s3_obj = cc.s3_resource.Object(s3_bucket, f"{s3_input_data_path}/{filename}")
    status = s3_obj.put(Body=bytes(json.dumps(data_set).encode("UTF-8")))

    ## Print Status
    print(status)

else:
    file_path = f"{OUTPUT_DATA_PATH}/{filename}"
    with open(file_path, "w") as outfile:
        json.dump(data_set, outfile)
{'ResponseMetadata': {'RequestId': 'knht0bi2-f6z6ty-115m', 'HostId': 'knht0bi2-f6z6ty-115m', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-request-id': 'knht0bi2-f6z6ty-115m', 'x-amz-id-2': 'knht0bi2-f6z6ty-115m', 'access-control-allow-origin': '*', 'access-control-allow-credentials': 'true', 'access-control-allow-methods': 'GET,POST,PUT,DELETE,OPTIONS', 'access-control-allow-headers': 'Content-Type,Content-MD5,Authorization,X-Amz-User-Agent,X-Amz-Date,ETag,X-Amz-Content-Sha256', 'access-control-expose-headers': 'ETag,X-Amz-Version-Id', 'etag': '"7b7ff1bfdfe25e30abe9f490e53180c0"', 'date': 'Wed, 14 Apr 2021 18:45:34 GMT', 'connection': 'keep-alive', 'content-length': '0'}, 'RetryAttempts': 0}, 'ETag': '"7b7ff1bfdfe25e30abe9f490e53180c0"'}