From d48784e646677d4cdf44ecf4aa08a9970bd7ca0b Mon Sep 17 00:00:00 2001 From: Gregory Mierzwinski Date: Fri, 8 May 2020 02:48:34 -0400 Subject: [PATCH] Bug 1627027 - Use nightly Fenix variant. (#10265) * Use nightly Fenix variant. * Run the tests in PR. * Update visual-metrics scripts to include the similarity metrics. * Use python3.5 in visual-metrics docker. * Install wget in the docker. * Use python3.6 hashes instead of python3.5. * Undo run-visual-metrics.py python changes. * Upgrade python setuptools version to 46.1.3. * Add setuptools to transitive dependency list. * Undo PR test changes. * Remove setuptools install line and use requirements.txt instead. * Undo PR test changes. * Fix geckodriver artifact suffix. * Test a browsertime task. * Revert browsertime test. --- taskcluster/ci/browsertime/kind.yml | 4 +- taskcluster/ci/raptor/kind.yml | 4 +- taskcluster/ci/toolchain/gecko-derived.yml | 2 +- taskcluster/docker/visual-metrics/Dockerfile | 2 +- .../docker/visual-metrics/requirements.txt | 12 +- .../visual-metrics/run-visual-metrics.py | 48 +++- .../docker/visual-metrics/similarity.py | 251 ++++++++++++++++++ .../transforms/visual_metrics.py | 4 + 8 files changed, 316 insertions(+), 11 deletions(-) create mode 100644 taskcluster/docker/visual-metrics/similarity.py diff --git a/taskcluster/ci/browsertime/kind.yml b/taskcluster/ci/browsertime/kind.yml index 011764a63..361b00d45 100644 --- a/taskcluster/ci/browsertime/kind.yml +++ b/taskcluster/ci/browsertime/kind.yml @@ -13,7 +13,7 @@ kind-dependencies: primary-dependency: signing only-for-build-types: - - performance-test + - nightly only-for-abis: - armeabi-v7a @@ -81,7 +81,7 @@ job-defaults: - '--app=fenix' - '--browsertime' - '--cold' - - '--binary=org.mozilla.fenix.performancetest' + - '--binary=org.mozilla.fenix.nightly' - '--activity=org.mozilla.fenix.IntentReceiverActivity' - '--download-symbols=ondemand' - '--browsertime-node=$MOZ_FETCHES_DIR/node/bin/node' diff --git a/taskcluster/ci/raptor/kind.yml b/taskcluster/ci/raptor/kind.yml index 607a08c3f..000658188 100644 --- a/taskcluster/ci/raptor/kind.yml +++ b/taskcluster/ci/raptor/kind.yml @@ -11,7 +11,7 @@ kind-dependencies: - toolchain only-for-build-types: - - performance-test + - nightly only-for-abis: - armeabi-v7a @@ -76,7 +76,7 @@ job-defaults: - './test-linux.sh' - '--cfg=mozharness/configs/raptor/android_hw_config.py' - '--app=fenix' - - '--binary=org.mozilla.fenix.performancetest' + - '--binary=org.mozilla.fenix.nightly' - '--activity=org.mozilla.fenix.IntentReceiverActivity' - '--download-symbols=ondemand' fetches: diff --git a/taskcluster/ci/toolchain/gecko-derived.yml b/taskcluster/ci/toolchain/gecko-derived.yml index 5595d6629..da1dccfaf 100644 --- a/taskcluster/ci/toolchain/gecko-derived.yml +++ b/taskcluster/ci/toolchain/gecko-derived.yml @@ -27,7 +27,7 @@ linux64-ffmpeg-4.1.4: linux64-geckodriver: attributes: - toolchain-artifact: public/build/geckodriver.tar.xz + toolchain-artifact: public/build/geckodriver.tar.gz description: "Geckodriver toolchain" run: index-search: diff --git a/taskcluster/docker/visual-metrics/Dockerfile b/taskcluster/docker/visual-metrics/Dockerfile index 742aed114..a96636252 100644 --- a/taskcluster/docker/visual-metrics/Dockerfile +++ b/taskcluster/docker/visual-metrics/Dockerfile @@ -20,10 +20,10 @@ WORKDIR /builds/worker USER worker:worker COPY requirements.txt /builds/worker/requirements.txt -RUN pip3 install setuptools==46.0.0 RUN pip3 install --require-hashes -r /builds/worker/requirements.txt && \ rm /builds/worker/requirements.txt +COPY similarity.py /builds/worker/bin/similarity.py COPY run-visual-metrics.py /builds/worker/bin/run-visual-metrics.py COPY performance-artifact-schema.json /builds/worker/performance-artifact-schema.json diff --git a/taskcluster/docker/visual-metrics/requirements.txt b/taskcluster/docker/visual-metrics/requirements.txt index 936f3a2f5..560a0d008 100644 --- a/taskcluster/docker/visual-metrics/requirements.txt +++ b/taskcluster/docker/visual-metrics/requirements.txt @@ -1,13 +1,23 @@ +# Dependency hashes must be for python3.6 + # Direct dependencies attrs==19.1.0 --hash=sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79 structlog==19.1.0 --hash=sha256:db441b81c65b0f104a7ce5d86c5432be099956b98b8a2c8be0b3fb3a7a0b1536 voluptuous==0.11.5 --hash=sha256:303542b3fc07fb52ec3d7a1c614b329cdbee13a9d681935353d8ea56a7bfa9f1 jsonschema==3.2.0 --hash=sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163 +numpy==1.18.3 --hash=sha256:a551d8cc267c634774830086da42e4ba157fa41dd3b93982bc9501b284b0c689 +scipy==1.4.1 --hash=sha256:386086e2972ed2db17cebf88610aab7d7f6e2c0ca30042dc9a89cf18dcc363fa +matplotlib==3.0.3 --hash=sha256:e8d1939262aa6b36d0c51f50a50a43a04b9618d20db31e6c0192b1463067aeef +opencv-python==4.2.0.34 --hash=sha256:dcb8da8c5ebaa6360c8555547a4c7beb6cd983dd95ba895bb78b86cc8cf3de2b # Transitive dependencies importlib_metadata==1.1.0 --hash=sha256:e6ac600a142cf2db707b1998382cc7fc3b02befb7273876e01b8ad10b9652742 more_itertools==8.0.0 --hash=sha256:a0ea684c39bc4315ba7aae406596ef191fd84f873d2d2751f84d64e81a7a2d45 pyrsistent==0.15.6 --hash=sha256:f3b280d030afb652f79d67c5586157c5c1355c9a58dfc7940566e28d28f3df1b -setuptools==46.0.0 --hash=sha256:693e0504490ed8420522bf6bc3aa4b0da6a9f1c80c68acfb4e959275fd04cd82 six==1.12.0 --hash=sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c zipp==0.6.0 --hash=sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335 +cycler==0.10.0 --hash=sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d +kiwisolver==1.1.0 --hash=sha256:400599c0fe58d21522cae0e8b22318e09d9729451b17ee61ba8e1e7c0346565c +pyparsing==2.4.7 --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b +python-dateutil==2.8.1 --hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a +setuptools==46.1.3 --hash=sha256:4fe404eec2738c20ab5841fa2d791902d2a645f32318a7850ef26f8d7215a8ee diff --git a/taskcluster/docker/visual-metrics/run-visual-metrics.py b/taskcluster/docker/visual-metrics/run-visual-metrics.py index 69f1c32d1..4ae05172d 100644 --- a/taskcluster/docker/visual-metrics/run-visual-metrics.py +++ b/taskcluster/docker/visual-metrics/run-visual-metrics.py @@ -47,6 +47,7 @@ JOB_SCHEMA = Schema( {Required("test_name"): str, Required("browsertime_json_path"): str} ], Required("application"): {Required("name"): str, "version": str}, + Required("extra_options"): [str], } ) @@ -154,13 +155,13 @@ def read_json(json_path, schema): The contents of the file at ``json_path`` interpreted as JSON. """ try: - with open(str(json_path), "r") as f: + with open(str(json_path), "r", encoding="utf-8", errors="ignore") as f: data = json.load(f) except Exception: log.error("Could not read JSON file", path=json_path, exc_info=True) raise - log.info("Loaded JSON from file", path=json_path, read_json=data) + log.info("Loaded JSON from file", path=json_path) try: schema(data) @@ -202,9 +203,9 @@ def main(log, args): tar.extractall(path=str(fetch_dir)) except Exception: log.error( - "Could not read extract browsertime results archive", + "Could not read/extract browsertime results archive", path=browsertime_results_path, - exc_info=True, + exc_info=True ) return 1 log.info("Extracted browsertime results", path=browsertime_results_path) @@ -213,6 +214,11 @@ def main(log, args): jobs_json_path = fetch_dir / "browsertime-results" / "jobs.json" jobs_json = read_json(jobs_json_path, JOB_SCHEMA) except Exception: + log.error( + "Could not open the jobs.json file", + path=jobs_json_path, + exc_info=True + ) return 1 jobs = [] @@ -223,6 +229,11 @@ def main(log, args): try: browsertime_json = read_json(browsertime_json_path, BROWSERTIME_SCHEMA) except Exception: + log.error( + "Could not open a browsertime.json file", + path=browsertime_json_path, + exc_info=True + ) return 1 for site in browsertime_json: @@ -272,6 +283,35 @@ def main(log, args): "type": "vismet", "suites": suites, } + for entry in suites: + entry["extraOptions"] = jobs_json["extra_options"] + + # Try to get the similarity for all possible tests, this means that we + # will also get a comparison of recorded vs. live sites to check + # the on-going quality of our recordings. + similarity = None + if "android" in os.getenv("TC_PLATFORM", ""): + try: + from similarity import calculate_similarity + similarity = calculate_similarity(jobs_json, fetch_dir, OUTPUT_DIR, log) + except Exception: + log.info("Failed to calculate similarity score", exc_info=True) + + if similarity: + suites[0]["subtests"].append({ + "name": "Similarity3D", + "value": similarity[0], + "replicates": [similarity[0]], + "lowerIsBetter": False, + "unit": "a.u.", + }) + suites[0]["subtests"].append({ + "name": "Similarity2D", + "value": similarity[1], + "replicates": [similarity[1]], + "lowerIsBetter": False, + "unit": "a.u.", + }) # Validates the perf data complies with perfherder schema. # The perfherder schema uses jsonschema so we can't use voluptuous here. diff --git a/taskcluster/docker/visual-metrics/similarity.py b/taskcluster/docker/visual-metrics/similarity.py new file mode 100644 index 000000000..5820e531e --- /dev/null +++ b/taskcluster/docker/visual-metrics/similarity.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import cv2 +import json +import numpy as np +import os +import pathlib +import shutil +import socket +import tarfile +import tempfile +import urllib + +from functools import wraps +from matplotlib import pyplot as plt +from scipy.stats import spearmanr + + +def open_data(file): + return cv2.VideoCapture(str(file)) + + +def socket_timeout(value=120): + """Decorator for socket timeouts.""" + def _socket_timeout(func): + @wraps(func) + def __socket_timeout(*args, **kw): + old = socket.getdefaulttimeout() + socket.setdefaulttimeout(value) + try: + return func(*args, **kw) + finally: + socket.setdefaulttimeout(old) + return __socket_timeout + return _socket_timeout + + +@socket_timeout(120) +def query_activedata(query_json, log): + """Used to run queries on active data.""" + active_data_url = "http://activedata.allizom.org/query" + + req = urllib.request.Request(active_data_url) + req.add_header("Content-Type", "application/json") + jsondata = json.dumps(query_json) + + jsondataasbytes = jsondata.encode("utf-8") + req.add_header("Content-Length", len(jsondataasbytes)) + + log.info("Querying Active-data...") + response = urllib.request.urlopen(req, jsondataasbytes) + log.info("Status: %s" % {str(response.getcode())}) + + data = json.loads(response.read().decode("utf8").replace("'", '"'))["data"] + return data + + +@socket_timeout(120) +def download(url, loc, log): + """Downloads from a url (with a timeout).""" + log.info("Downloading %s" % url) + try: + urllib.request.urlretrieve(url, loc) + except Exception as e: + log.info(str(e)) + return False + return True + + +def get_frames(video): + """Gets all frames from a video into a list.""" + allframes = [] + while video.isOpened(): + ret, frame = video.read() + if ret: + # Convert to gray to simplify the process + allframes.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)) + else: + video.release() + break + return allframes + + +def calculate_similarity(jobs_json, fetch_dir, output, log): + """Calculates the similarity score against the last live site test. + + The technique works as follows: + 1. Get the last live site test. + 2. For each 15x15 video pairings, build a cross-correlation matrix: + 1. Get each of the videos and calculate their histograms + across the full videos. + 2. Calculate the correlation coefficient between these two. + 3. Average the cross-correlation matrix to obtain the score. + + The 2D similarity score is the same, except that it builds a histogram + from the final frame instead of the full video. + + For finding the last live site, we use active-data. We search for + PGO android builds since this metric is only available for live sites that + run on android in mozilla-cental. Given that live sites currently + run on cron 3 days a week, then it's also reasonable to look for tasks + which have occurred before today and within the last two weeks at most. + But this is a TODO for future work, since we need to determine a better + way of selecting the last task (HG push logs?) - there's a lot that factors + into these choices, so it might require a multi-faceted approach. + + Args: + jobs_json: The jobs JSON that holds extra information. + fetch_dir: The fetch directory that holds the new videos. + log: The logger. + Returns: + Two similarity scores (3D, 2D) as a float, or None if there was an issue. + """ + app = jobs_json["application"]["name"] + test = jobs_json["jobs"][0]["test_name"] + splittest = test.split("-cold") + + cold = "" + if len(splittest) > 0: + cold = ".*cold" + test = splittest[0] + + # PGO vs. OPT shouldn't matter much, but we restrict it to PGO builds here + # for android, and desktop tests have the opt/pgo restriction removed + plat = os.getenv("TC_PLATFORM", "") + if "android" in plat: + plat = plat.replace("/opt", "/pgo") + else: + plat = plat.replace("/opt", "").replace("/pgo", "") + ad_query = { + "from": "task", + "limit": 1000, + "where": { + "and": [ + { + "regexp": { + "run.name": ".*%s.*browsertime.*-live.*%s%s.*%s.*" + % (plat, app, cold, test) + } + }, + {"not": {"prefix": {"run.name": "test-vismet"}}}, + {"in": {"repo.branch.name": ["mozilla-central"]}}, + {"gte": {"action.start_time": {"date": "today-week-week"}}}, + {"lt": {"action.start_time": {"date": "today-1day"}}}, + {"in": {"task.run.state": ["completed"]}}, + ] + }, + "select": ["action.start_time", "run.name", "task.artifacts"], + } + + # Run the AD query and find the browsertime videos to download + failed = False + try: + data = query_activedata(ad_query, log) + except Exception as e: + log.info(str(e)) + failed = True + if failed or not data: + log.info("Couldn't get activedata data") + return None + + log.info("Found %s datums" % str(len(data["action.start_time"]))) + maxind = np.argmax([float(t) for t in data["action.start_time"]]) + artifacts = data["task.artifacts"][maxind] + btime_artifact = None + for art in artifacts: + if "browsertime-results" in art["name"]: + btime_artifact = art["url"] + break + if not btime_artifact: + log.info("Can't find an older live site") + return None + + # Download the browsertime videos and untar them + tmpdir = tempfile.mkdtemp() + loc = os.path.join(tmpdir, "tmpfile.tgz") + if not download(btime_artifact, loc, log): + return None + tmploc = tempfile.mkdtemp() + try: + with tarfile.open(str(loc)) as tar: + tar.extractall(path=tmploc) + except Exception: + log.info( + "Could not read/extract old browsertime results archive", + path=loc, + exc_info=True, + ) + return None + + # Find all the videos + oldmp4s = [str(f) for f in pathlib.Path(tmploc).rglob("*.mp4")] + log.info("Found %s old videos" % str(len(oldmp4s))) + newmp4s = [str(f) for f in pathlib.Path(fetch_dir).rglob("*.mp4")] + log.info("Found %s new videos" % str(len(newmp4s))) + + # Finally, calculate the 2D/3D score + nhists = [] + nhists2d = [] + + total_vids = min(len(oldmp4s), len(newmp4s)) + xcorr = np.zeros((total_vids, total_vids)) + xcorr2d = np.zeros((total_vids, total_vids)) + + for i in range(total_vids): + datao = np.asarray(get_frames(open_data(oldmp4s[i]))) + + histo, _, _ = plt.hist(datao.flatten(), bins=255) + histo2d, _, _ = plt.hist(datao[-1, :, :].flatten(), bins=255) + + for j in range(total_vids): + if i == 0: + # Only calculate the histograms once; it takes time + datan = np.asarray(get_frames(open_data(newmp4s[j]))) + + histn, _, _ = plt.hist(datan.flatten(), bins=255) + histn2d, _, _ = plt.hist(datan[-1, :, :].flatten(), bins=255) + + nhists.append(histn) + nhists2d.append(histn2d) + else: + histn = nhists[j] + histn2d = nhists2d[j] + + rho, _ = spearmanr(histn, histo) + rho2d, _ = spearmanr(histn2d, histo2d) + + xcorr[i, j] = rho + xcorr2d[i, j] = rho2d + + similarity = np.mean(xcorr) + similarity2d = np.mean(xcorr2d) + + log.info("Average 3D similarity: %s" % str(np.round(similarity, 5))) + log.info("Average 2D similarity: %s" % str(np.round(similarity2d, 5))) + + if similarity < 0.5: + # For really low correlations, output the worst video pairing + # so that we can visually see what the issue was + minind = np.unravel_index(np.argmin(xcorr, axis=None), xcorr.shape) + + oldvid = oldmp4s[minind[0]] + shutil.copyfile(oldvid, str(pathlib.Path(output, "old_video.mp4"))) + + newvid = newmp4s[minind[1]] + shutil.copyfile(newvid, str(pathlib.Path(output, "new_video.mp4"))) + + return np.round(similarity, 5), np.round(similarity2d, 5) diff --git a/taskcluster/fenix_taskgraph/transforms/visual_metrics.py b/taskcluster/fenix_taskgraph/transforms/visual_metrics.py index 7b65ba2a9..6a2a7950d 100644 --- a/taskcluster/fenix_taskgraph/transforms/visual_metrics.py +++ b/taskcluster/fenix_taskgraph/transforms/visual_metrics.py @@ -75,6 +75,10 @@ def run_visual_metrics(config, jobs): symbol=treeherder_info['symbol'] ) + # Store the platform name so we can use it to calculate + # the similarity metric against other tasks + job['worker'].setdefault('env', {})['TC_PLATFORM'] = platform + # run-on-projects needs to be set based on the dependent task attributes = dict(dep_job.attributes) job['run-on-projects'] = attributes['run_on_projects']