From d48784e646677d4cdf44ecf4aa08a9970bd7ca0b Mon Sep 17 00:00:00 2001
From: Gregory Mierzwinski <gmierz1@live.ca>
Date: Fri, 8 May 2020 02:48:34 -0400
Subject: [PATCH] Bug 1627027 - Use nightly Fenix variant. (#10265)

* Use nightly Fenix variant.

* Run the tests in PR.

* Update visual-metrics scripts to include the similarity metrics.

* Use python3.5 in visual-metrics docker.

* Install wget in the docker.

* Use python3.6 hashes instead of python3.5.

* Undo run-visual-metrics.py python changes.

* Upgrade python setuptools version to 46.1.3.

* Add setuptools to transitive dependency list.

* Undo PR test changes.

* Remove setuptools install line and use requirements.txt instead.

* Undo PR test changes.

* Fix geckodriver artifact suffix.

* Test a browsertime task.

* Revert browsertime test.
---
 taskcluster/ci/browsertime/kind.yml           |   4 +-
 taskcluster/ci/raptor/kind.yml                |   4 +-
 taskcluster/ci/toolchain/gecko-derived.yml    |   2 +-
 taskcluster/docker/visual-metrics/Dockerfile  |   2 +-
 .../docker/visual-metrics/requirements.txt    |  12 +-
 .../visual-metrics/run-visual-metrics.py      |  48 +++-
 .../docker/visual-metrics/similarity.py       | 251 ++++++++++++++++++
 .../transforms/visual_metrics.py              |   4 +
 8 files changed, 316 insertions(+), 11 deletions(-)
 create mode 100644 taskcluster/docker/visual-metrics/similarity.py

diff --git a/taskcluster/ci/browsertime/kind.yml b/taskcluster/ci/browsertime/kind.yml
index 011764a63..361b00d45 100644
--- a/taskcluster/ci/browsertime/kind.yml
+++ b/taskcluster/ci/browsertime/kind.yml
@@ -13,7 +13,7 @@ kind-dependencies:
 primary-dependency: signing
 
 only-for-build-types:
-    - performance-test
+    - nightly
 
 only-for-abis:
     - armeabi-v7a
@@ -81,7 +81,7 @@ job-defaults:
             - '--app=fenix'
             - '--browsertime'
             - '--cold'
-            - '--binary=org.mozilla.fenix.performancetest'
+            - '--binary=org.mozilla.fenix.nightly'
             - '--activity=org.mozilla.fenix.IntentReceiverActivity'
             - '--download-symbols=ondemand'
             - '--browsertime-node=$MOZ_FETCHES_DIR/node/bin/node'
diff --git a/taskcluster/ci/raptor/kind.yml b/taskcluster/ci/raptor/kind.yml
index 607a08c3f..000658188 100644
--- a/taskcluster/ci/raptor/kind.yml
+++ b/taskcluster/ci/raptor/kind.yml
@@ -11,7 +11,7 @@ kind-dependencies:
     - toolchain
 
 only-for-build-types:
-    - performance-test
+    - nightly
 
 only-for-abis:
     - armeabi-v7a
@@ -76,7 +76,7 @@ job-defaults:
             - './test-linux.sh'
             - '--cfg=mozharness/configs/raptor/android_hw_config.py'
             - '--app=fenix'
-            - '--binary=org.mozilla.fenix.performancetest'
+            - '--binary=org.mozilla.fenix.nightly'
             - '--activity=org.mozilla.fenix.IntentReceiverActivity'
             - '--download-symbols=ondemand'
     fetches:
diff --git a/taskcluster/ci/toolchain/gecko-derived.yml b/taskcluster/ci/toolchain/gecko-derived.yml
index 5595d6629..da1dccfaf 100644
--- a/taskcluster/ci/toolchain/gecko-derived.yml
+++ b/taskcluster/ci/toolchain/gecko-derived.yml
@@ -27,7 +27,7 @@ linux64-ffmpeg-4.1.4:
 
 linux64-geckodriver:
     attributes:
-        toolchain-artifact: public/build/geckodriver.tar.xz
+        toolchain-artifact: public/build/geckodriver.tar.gz
     description: "Geckodriver toolchain"
     run:
         index-search:
diff --git a/taskcluster/docker/visual-metrics/Dockerfile b/taskcluster/docker/visual-metrics/Dockerfile
index 742aed114..a96636252 100644
--- a/taskcluster/docker/visual-metrics/Dockerfile
+++ b/taskcluster/docker/visual-metrics/Dockerfile
@@ -20,10 +20,10 @@ WORKDIR /builds/worker
 USER worker:worker
 
 COPY requirements.txt /builds/worker/requirements.txt
-RUN pip3 install setuptools==46.0.0
 RUN pip3 install --require-hashes -r /builds/worker/requirements.txt && \
     rm /builds/worker/requirements.txt
 
+COPY similarity.py /builds/worker/bin/similarity.py
 COPY run-visual-metrics.py /builds/worker/bin/run-visual-metrics.py
 COPY performance-artifact-schema.json /builds/worker/performance-artifact-schema.json
 
diff --git a/taskcluster/docker/visual-metrics/requirements.txt b/taskcluster/docker/visual-metrics/requirements.txt
index 936f3a2f5..560a0d008 100644
--- a/taskcluster/docker/visual-metrics/requirements.txt
+++ b/taskcluster/docker/visual-metrics/requirements.txt
@@ -1,13 +1,23 @@
+# Dependency hashes must be for python3.6
+
 # Direct dependencies
 attrs==19.1.0 --hash=sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79
 structlog==19.1.0 --hash=sha256:db441b81c65b0f104a7ce5d86c5432be099956b98b8a2c8be0b3fb3a7a0b1536
 voluptuous==0.11.5 --hash=sha256:303542b3fc07fb52ec3d7a1c614b329cdbee13a9d681935353d8ea56a7bfa9f1
 jsonschema==3.2.0 --hash=sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163
+numpy==1.18.3 --hash=sha256:a551d8cc267c634774830086da42e4ba157fa41dd3b93982bc9501b284b0c689
+scipy==1.4.1 --hash=sha256:386086e2972ed2db17cebf88610aab7d7f6e2c0ca30042dc9a89cf18dcc363fa
+matplotlib==3.0.3 --hash=sha256:e8d1939262aa6b36d0c51f50a50a43a04b9618d20db31e6c0192b1463067aeef
+opencv-python==4.2.0.34 --hash=sha256:dcb8da8c5ebaa6360c8555547a4c7beb6cd983dd95ba895bb78b86cc8cf3de2b
 
 # Transitive dependencies
 importlib_metadata==1.1.0 --hash=sha256:e6ac600a142cf2db707b1998382cc7fc3b02befb7273876e01b8ad10b9652742
 more_itertools==8.0.0 --hash=sha256:a0ea684c39bc4315ba7aae406596ef191fd84f873d2d2751f84d64e81a7a2d45
 pyrsistent==0.15.6 --hash=sha256:f3b280d030afb652f79d67c5586157c5c1355c9a58dfc7940566e28d28f3df1b
-setuptools==46.0.0 --hash=sha256:693e0504490ed8420522bf6bc3aa4b0da6a9f1c80c68acfb4e959275fd04cd82
 six==1.12.0 --hash=sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c
 zipp==0.6.0 --hash=sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335
+cycler==0.10.0 --hash=sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d
+kiwisolver==1.1.0 --hash=sha256:400599c0fe58d21522cae0e8b22318e09d9729451b17ee61ba8e1e7c0346565c
+pyparsing==2.4.7 --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b
+python-dateutil==2.8.1 --hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a
+setuptools==46.1.3 --hash=sha256:4fe404eec2738c20ab5841fa2d791902d2a645f32318a7850ef26f8d7215a8ee
diff --git a/taskcluster/docker/visual-metrics/run-visual-metrics.py b/taskcluster/docker/visual-metrics/run-visual-metrics.py
index 69f1c32d1..4ae05172d 100644
--- a/taskcluster/docker/visual-metrics/run-visual-metrics.py
+++ b/taskcluster/docker/visual-metrics/run-visual-metrics.py
@@ -47,6 +47,7 @@ JOB_SCHEMA = Schema(
             {Required("test_name"): str, Required("browsertime_json_path"): str}
         ],
         Required("application"): {Required("name"): str, "version": str},
+        Required("extra_options"): [str],
     }
 )
 
@@ -154,13 +155,13 @@ def read_json(json_path, schema):
         The contents of the file at ``json_path`` interpreted as JSON.
     """
     try:
-        with open(str(json_path), "r") as f:
+        with open(str(json_path),  "r", encoding="utf-8", errors="ignore") as f:
             data = json.load(f)
     except Exception:
         log.error("Could not read JSON file", path=json_path, exc_info=True)
         raise
 
-    log.info("Loaded JSON from file", path=json_path, read_json=data)
+    log.info("Loaded JSON from file", path=json_path)
 
     try:
         schema(data)
@@ -202,9 +203,9 @@ def main(log, args):
             tar.extractall(path=str(fetch_dir))
     except Exception:
         log.error(
-            "Could not read extract browsertime results archive",
+            "Could not read/extract browsertime results archive",
             path=browsertime_results_path,
-            exc_info=True,
+            exc_info=True
         )
         return 1
     log.info("Extracted browsertime results", path=browsertime_results_path)
@@ -213,6 +214,11 @@ def main(log, args):
         jobs_json_path = fetch_dir / "browsertime-results" / "jobs.json"
         jobs_json = read_json(jobs_json_path, JOB_SCHEMA)
     except Exception:
+        log.error(
+            "Could not open the jobs.json file",
+            path=jobs_json_path,
+            exc_info=True
+        )
         return 1
 
     jobs = []
@@ -223,6 +229,11 @@ def main(log, args):
         try:
             browsertime_json = read_json(browsertime_json_path, BROWSERTIME_SCHEMA)
         except Exception:
+            log.error(
+                "Could not open a browsertime.json file",
+                path=browsertime_json_path,
+                exc_info=True
+            )
             return 1
 
         for site in browsertime_json:
@@ -272,6 +283,35 @@ def main(log, args):
         "type": "vismet",
         "suites": suites,
     }
+    for entry in suites:
+        entry["extraOptions"] = jobs_json["extra_options"]
+
+    # Try to get the similarity for all possible tests, this means that we
+    # will also get a comparison of recorded vs. live sites to check
+    # the on-going quality of our recordings.
+    similarity = None
+    if "android" in os.getenv("TC_PLATFORM", ""):
+        try:
+            from similarity import calculate_similarity
+            similarity = calculate_similarity(jobs_json, fetch_dir, OUTPUT_DIR, log)
+        except Exception:
+            log.info("Failed to calculate similarity score", exc_info=True)
+
+    if similarity:
+        suites[0]["subtests"].append({
+            "name": "Similarity3D",
+            "value": similarity[0],
+            "replicates": [similarity[0]],
+            "lowerIsBetter": False,
+            "unit": "a.u.",
+        })
+        suites[0]["subtests"].append({
+            "name": "Similarity2D",
+            "value": similarity[1],
+            "replicates": [similarity[1]],
+            "lowerIsBetter": False,
+            "unit": "a.u.",
+        })
 
     # Validates the perf data complies with perfherder schema.
     # The perfherder schema uses jsonschema so we can't use voluptuous here.
diff --git a/taskcluster/docker/visual-metrics/similarity.py b/taskcluster/docker/visual-metrics/similarity.py
new file mode 100644
index 000000000..5820e531e
--- /dev/null
+++ b/taskcluster/docker/visual-metrics/similarity.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env python3
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import cv2
+import json
+import numpy as np
+import os
+import pathlib
+import shutil
+import socket
+import tarfile
+import tempfile
+import urllib
+
+from functools import wraps
+from matplotlib import pyplot as plt
+from scipy.stats import spearmanr
+
+
+def open_data(file):
+    return cv2.VideoCapture(str(file))
+
+
+def socket_timeout(value=120):
+    """Decorator for socket timeouts."""
+    def _socket_timeout(func):
+        @wraps(func)
+        def __socket_timeout(*args, **kw):
+            old = socket.getdefaulttimeout()
+            socket.setdefaulttimeout(value)
+            try:
+                return func(*args, **kw)
+            finally:
+                socket.setdefaulttimeout(old)
+        return __socket_timeout
+    return _socket_timeout
+
+
+@socket_timeout(120)
+def query_activedata(query_json, log):
+    """Used to run queries on active data."""
+    active_data_url = "http://activedata.allizom.org/query"
+
+    req = urllib.request.Request(active_data_url)
+    req.add_header("Content-Type", "application/json")
+    jsondata = json.dumps(query_json)
+
+    jsondataasbytes = jsondata.encode("utf-8")
+    req.add_header("Content-Length", len(jsondataasbytes))
+
+    log.info("Querying Active-data...")
+    response = urllib.request.urlopen(req, jsondataasbytes)
+    log.info("Status: %s" % {str(response.getcode())})
+
+    data = json.loads(response.read().decode("utf8").replace("'", '"'))["data"]
+    return data
+
+
+@socket_timeout(120)
+def download(url, loc, log):
+    """Downloads from a url (with a timeout)."""
+    log.info("Downloading %s" % url)
+    try:
+        urllib.request.urlretrieve(url, loc)
+    except Exception as e:
+        log.info(str(e))
+        return False
+    return True
+
+
+def get_frames(video):
+    """Gets all frames from a video into a list."""
+    allframes = []
+    while video.isOpened():
+        ret, frame = video.read()
+        if ret:
+            # Convert to gray to simplify the process
+            allframes.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
+        else:
+            video.release()
+            break
+    return allframes
+
+
+def calculate_similarity(jobs_json, fetch_dir, output, log):
+    """Calculates the similarity score against the last live site test.
+
+    The technique works as follows:
+        1. Get the last live site test.
+        2. For each 15x15 video pairings, build a cross-correlation matrix:
+            1. Get each of the videos and calculate their histograms
+               across the full videos.
+            2. Calculate the correlation coefficient between these two.
+        3. Average the cross-correlation matrix to obtain the score.
+
+    The 2D similarity score is the same, except that it builds a histogram
+    from the final frame instead of the full video.
+
+    For finding the last live site, we use active-data. We search for
+    PGO android builds since this metric is only available for live sites that
+    run on android in mozilla-cental. Given that live sites currently
+    run on cron 3 days a week, then it's also reasonable to look for tasks
+    which have occurred before today and within the last two weeks at most.
+    But this is a TODO for future work, since we need to determine a better
+    way of selecting the last task (HG push logs?) - there's a lot that factors
+    into these choices, so it might require a multi-faceted approach.
+
+    Args:
+        jobs_json: The jobs JSON that holds extra information.
+        fetch_dir: The fetch directory that holds the new videos.
+        log: The logger.
+    Returns:
+        Two similarity scores (3D, 2D) as a float, or None if there was an issue.
+    """
+    app = jobs_json["application"]["name"]
+    test = jobs_json["jobs"][0]["test_name"]
+    splittest = test.split("-cold")
+
+    cold = ""
+    if len(splittest) > 0:
+        cold = ".*cold"
+    test = splittest[0]
+
+    # PGO vs. OPT shouldn't matter much, but we restrict it to PGO builds here
+    # for android, and desktop tests have the opt/pgo restriction removed
+    plat = os.getenv("TC_PLATFORM", "")
+    if "android" in plat:
+        plat = plat.replace("/opt", "/pgo")
+    else:
+        plat = plat.replace("/opt", "").replace("/pgo", "")
+    ad_query = {
+        "from": "task",
+        "limit": 1000,
+        "where": {
+            "and": [
+                {
+                    "regexp": {
+                        "run.name": ".*%s.*browsertime.*-live.*%s%s.*%s.*"
+                        % (plat, app, cold, test)
+                    }
+                },
+                {"not": {"prefix": {"run.name": "test-vismet"}}},
+                {"in": {"repo.branch.name": ["mozilla-central"]}},
+                {"gte": {"action.start_time": {"date": "today-week-week"}}},
+                {"lt": {"action.start_time": {"date": "today-1day"}}},
+                {"in": {"task.run.state": ["completed"]}},
+            ]
+        },
+        "select": ["action.start_time", "run.name", "task.artifacts"],
+    }
+
+    # Run the AD query and find the browsertime videos to download
+    failed = False
+    try:
+        data = query_activedata(ad_query, log)
+    except Exception as e:
+        log.info(str(e))
+        failed = True
+    if failed or not data:
+        log.info("Couldn't get activedata data")
+        return None
+
+    log.info("Found %s datums" % str(len(data["action.start_time"])))
+    maxind = np.argmax([float(t) for t in data["action.start_time"]])
+    artifacts = data["task.artifacts"][maxind]
+    btime_artifact = None
+    for art in artifacts:
+        if "browsertime-results" in art["name"]:
+            btime_artifact = art["url"]
+            break
+    if not btime_artifact:
+        log.info("Can't find an older live site")
+        return None
+
+    # Download the browsertime videos and untar them
+    tmpdir = tempfile.mkdtemp()
+    loc = os.path.join(tmpdir, "tmpfile.tgz")
+    if not download(btime_artifact, loc, log):
+        return None
+    tmploc = tempfile.mkdtemp()
+    try:
+        with tarfile.open(str(loc)) as tar:
+            tar.extractall(path=tmploc)
+    except Exception:
+        log.info(
+            "Could not read/extract old browsertime results archive",
+            path=loc,
+            exc_info=True,
+        )
+        return None
+
+    # Find all the videos
+    oldmp4s = [str(f) for f in pathlib.Path(tmploc).rglob("*.mp4")]
+    log.info("Found %s old videos" % str(len(oldmp4s)))
+    newmp4s = [str(f) for f in pathlib.Path(fetch_dir).rglob("*.mp4")]
+    log.info("Found %s new videos" % str(len(newmp4s)))
+
+    # Finally, calculate the 2D/3D score
+    nhists = []
+    nhists2d = []
+
+    total_vids = min(len(oldmp4s), len(newmp4s))
+    xcorr = np.zeros((total_vids, total_vids))
+    xcorr2d = np.zeros((total_vids, total_vids))
+
+    for i in range(total_vids):
+        datao = np.asarray(get_frames(open_data(oldmp4s[i])))
+
+        histo, _, _ = plt.hist(datao.flatten(), bins=255)
+        histo2d, _, _ = plt.hist(datao[-1, :, :].flatten(), bins=255)
+
+        for j in range(total_vids):
+            if i == 0:
+                # Only calculate the histograms once; it takes time
+                datan = np.asarray(get_frames(open_data(newmp4s[j])))
+
+                histn, _, _ = plt.hist(datan.flatten(), bins=255)
+                histn2d, _, _ = plt.hist(datan[-1, :, :].flatten(), bins=255)
+
+                nhists.append(histn)
+                nhists2d.append(histn2d)
+            else:
+                histn = nhists[j]
+                histn2d = nhists2d[j]
+
+            rho, _ = spearmanr(histn, histo)
+            rho2d, _ = spearmanr(histn2d, histo2d)
+
+            xcorr[i, j] = rho
+            xcorr2d[i, j] = rho2d
+
+    similarity = np.mean(xcorr)
+    similarity2d = np.mean(xcorr2d)
+
+    log.info("Average 3D similarity: %s" % str(np.round(similarity, 5)))
+    log.info("Average 2D similarity: %s" % str(np.round(similarity2d, 5)))
+
+    if similarity < 0.5:
+        # For really low correlations, output the worst video pairing
+        # so that we can visually see what the issue was
+        minind = np.unravel_index(np.argmin(xcorr, axis=None), xcorr.shape)
+
+        oldvid = oldmp4s[minind[0]]
+        shutil.copyfile(oldvid, str(pathlib.Path(output, "old_video.mp4")))
+
+        newvid = newmp4s[minind[1]]
+        shutil.copyfile(newvid, str(pathlib.Path(output, "new_video.mp4")))
+
+    return np.round(similarity, 5), np.round(similarity2d, 5)
diff --git a/taskcluster/fenix_taskgraph/transforms/visual_metrics.py b/taskcluster/fenix_taskgraph/transforms/visual_metrics.py
index 7b65ba2a9..6a2a7950d 100644
--- a/taskcluster/fenix_taskgraph/transforms/visual_metrics.py
+++ b/taskcluster/fenix_taskgraph/transforms/visual_metrics.py
@@ -75,6 +75,10 @@ def run_visual_metrics(config, jobs):
                 symbol=treeherder_info['symbol']
             )
 
+            # Store the platform name so we can use it to calculate
+            # the similarity metric against other tasks
+            job['worker'].setdefault('env', {})['TC_PLATFORM'] = platform
+
             # run-on-projects needs to be set based on the dependent task
             attributes = dict(dep_job.attributes)
             job['run-on-projects'] = attributes['run_on_projects']