From f4202971c006e6efdbb628ed2c3a6aea66c7ac05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 13:57:39 +0100 Subject: [PATCH 01/13] fix hydra evaluator --- config/services/hydra.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/config/services/hydra.nix b/config/services/hydra.nix index 2ae6a0be..f2726166 100644 --- a/config/services/hydra.nix +++ b/config/services/hydra.nix @@ -63,6 +63,7 @@ let --subst-var-by nix-eval-jobs ${nix-eval-jobs.packages.x86_64-linux.nix-eval-jobs}/bin/nix-eval-jobs \ --subst-var-by nix ${pkgs.nix}/bin/nix \ --subst-var-by ssh ${pkgs.openssh}/bin/ssh + chmod +x $out ''; }; in From 123d5674cb6507c3e60002f1f2eea2b8ea6bcde4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 14:02:28 +0100 Subject: [PATCH 02/13] use subprocess.run instead of subprocess.call --- config/services/hydra/remote-eval-jobs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/services/hydra/remote-eval-jobs.py b/config/services/hydra/remote-eval-jobs.py index bb33f8b9..d1790527 100644 --- a/config/services/hydra/remote-eval-jobs.py +++ b/config/services/hydra/remote-eval-jobs.py @@ -6,7 +6,7 @@ import json # First check if the server is up -if subprocess.call(["@ping@", "-c", "1", "rainbow-resort.int.chir.rs"], stdout=subprocess.DEVNULL).returncode != 0: +if subprocess.run(["@ping@", "-c", "1", "rainbow-resort.int.chir.rs"], stdout=subprocess.DEVNULL).returncode != 0: os.execv("@nix-eval-jobs@", ["@nix-eval-jobs@"] + sys.argv[1:]) inputs_to_copy = set() @@ -41,16 +41,16 @@ remote_args += ["--workers" "4"] # copy over what files we need to ensure are present on the target -subprocess.call(["@nix@", "copy"] + list(inputs_to_copy) + ["--to", "ssh://build-rainbow-resort", "--no-check-sigs"], check=True, stdout=subprocess.DEVNULL) +subprocess.run(["@nix@", "copy"] + list(inputs_to_copy) + ["--to", "ssh://build-rainbow-resort", "--no-check-sigs"], check=True, stdout=subprocess.DEVNULL) # Evaluate on target -result = subprocess.call(["@ssh@", "build-rainbow-resort", "nix-eval-jobs"] + remote_args, check=True, stdout=subprocess.PIPE, text=True) +result = subprocess.run(["@ssh@", "build-rainbow-resort", "nix-eval-jobs"] + remote_args, check=True, stdout=subprocess.PIPE, text=True) for line in result.stdout: try: data = json.loads(line) # copy .drv file home - subprocess.call(["@nix@", "copy", data["drvPath"], "--from", "ssh://build-rainbow-resort", "--no-check-sigs"], check=True, stdout=subprocess.DEVNULL) + subprocess.run(["@nix@", "copy", data["drvPath"], "--from", "ssh://build-rainbow-resort", "--no-check-sigs"], check=True, stdout=subprocess.DEVNULL) # if we have a gcroot, add it to it if gcroots is not None: drvBasename = os.path.basename(data["drvPath"]) From 5706677a7a62d536de536732ed7a6826fb7f5d44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 14:26:24 +0100 Subject: [PATCH 03/13] fix hydra remote eval job --- config/services/hydra/remote-eval-jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/services/hydra/remote-eval-jobs.py b/config/services/hydra/remote-eval-jobs.py index d1790527..7815683c 100644 --- a/config/services/hydra/remote-eval-jobs.py +++ b/config/services/hydra/remote-eval-jobs.py @@ -25,7 +25,7 @@ for arg in sys.argv[1:]: if arg == "--gc-roots-dir": next_to_gcroots = True if next_to_gcroots: - next_to_gcroots = false + next_to_gcroots = False gcroots = arg if skip_next > 0: skip_next -= 1 From 990d315d7b7ecf08a0783900b34cf00aa283e1bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 14:35:12 +0100 Subject: [PATCH 04/13] add appropriate secrets --- config/services/hydra.nix | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/config/services/hydra.nix b/config/services/hydra.nix index f2726166..e907ff84 100644 --- a/config/services/hydra.nix +++ b/config/services/hydra.nix @@ -185,6 +185,12 @@ in }; }; nix.settings.trusted-users = [ "@hydra" ]; + sops.secrets."hydra-evaluator/ssh/builder_id_ed25519" = { + sopsFile = ../../secrets/shared.yaml; + owner = "hydra"; + key = "ssh/builder_id_ed25519"; + path = "/var/lib/hydra/.ssh/builder_id_ed25519"; + }; sops.secrets."hydra/ssh/builder_id_ed25519" = { sopsFile = ../../secrets/shared.yaml; owner = "hydra-queue-runner"; @@ -195,6 +201,9 @@ in mkdir -p /var/lib/hydra/queue-runner/.ssh/ chown -Rv hydra-queue-runner /var/lib/hydra/queue-runner ln -svf ${sshConfig} /var/lib/hydra/queue-runner/.ssh/config + mkdir -p /var/lib/hydra/.ssh/ + chown -Rv hydra /var/lib/hydra/.ssh + ln -svf ${sshConfig} /var/lib/hydra/.ssh/config ''; sops.secrets."attic/config.toml" = { owner = "hydra-queue-runner"; From d0d671f3a9e9fe5ac1458f5b9d1ca45d6035d245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 14:40:07 +0100 Subject: [PATCH 05/13] use the correct key path for hydra-evaluator --- config/services/hydra.nix | 78 ++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/config/services/hydra.nix b/config/services/hydra.nix index e907ff84..d9de8fcb 100644 --- a/config/services/hydra.nix +++ b/config/services/hydra.nix @@ -14,43 +14,45 @@ let build-aarch64 aarch64-linux,riscv32-linux,riscv64-linux - 4 1 nixos-test,benchmark,ca-derivations,gccarch-armv8-a,gccarch-armv8.1-a,gccarch-armv8.2-a,big-parallel - build-riscv riscv64-linux,riscv32-linux - 4 2 nixos-test,benchmark,ca-derivations,gccarch-rv64gc_zba_zbb,gccarch-rv64gc_zba,gccarch-rv64gc_zbb,ccarch-rv64gc,gccarch-rv32gc_zba_zbb,gccarch-rv32gc_zba,gccarch-rv32gc_zbb,gccarch-rv32gc,big-parallel,native-riscv - ''; - sshConfig = pkgs.writeText "ssh-config" '' - Host build-aarch64 - Port 22 - IdentitiesOnly yes - User remote-build - HostName instance-20221213-1915.int.chir.rs - IdentityFile /var/lib/hydra/queue-runner/.ssh/builder_id_ed25519 - Host build-nas - Port 22 - IdentitiesOnly yes - User remote-build - HostName nas.int.chir.rs - IdentityFile /var/lib/hydra/queue-runner/.ssh/builder_id_ed25519 - Host build-rainbow-resort - Port 22 - IdentitiesOnly yes - User remote-build - HostName rainbow-resort.int.chir.rs - IdentityFile /var/lib/hydra/queue-runner/.ssh/builder_id_ed25519 - Host build-riscv - Port 22 - IdentitiesOnly yes - User remote-build - HostName not522.tailbab65.ts.net - IdentityFile /var/lib/hydra/queue-runner/.ssh/builder_id_ed25519 + sshConfig = + home: + pkgs.writeText "ssh-config" '' + Host build-aarch64 + Port 22 + IdentitiesOnly yes + User remote-build + HostName instance-20221213-1915.int.chir.rs + IdentityFile ${home}/.ssh/builder_id_ed25519 + Host build-nas + Port 22 + IdentitiesOnly yes + User remote-build + HostName nas.int.chir.rs + IdentityFile ${home}/.ssh/builder_id_ed25519 + Host build-rainbow-resort + Port 22 + IdentitiesOnly yes + User remote-build + HostName rainbow-resort.int.chir.rs + IdentityFile ${home}/.ssh/builder_id_ed25519 + Host build-riscv + Port 22 + IdentitiesOnly yes + User remote-build + HostName not522.tailbab65.ts.net + IdentityFile ${home}/.ssh/builder_id_ed25519 - Host * - ForwardAgent no - Compression no - ServerAliveInterval 0 - ServerAliveCountMax 3 - HashKnownHosts no - UserKnownHostsFile ~/.ssh/known_hosts - ControlMaster auto - ControlPath ~/.ssh/master-%r@%n:%p - ControlPersist 10m - ''; + Host * + ForwardAgent no + Compression no + ServerAliveInterval 0 + ServerAliveCountMax 3 + HashKnownHosts no + UserKnownHostsFile ~/.ssh/known_hosts + ControlMaster auto + ControlPath ~/.ssh/master-%r@%n:%p + ControlPersist 10m + ''; nix-eval-jobs-script = pkgs.stdenvNoCC.mkDerivation { name = "remote-eval-jobs.py"; src = ./hydra/remote-eval-jobs.py; @@ -200,10 +202,10 @@ in system.activationScripts.setupHydraSshConfig = lib.stringAfter [ "var" ] '' mkdir -p /var/lib/hydra/queue-runner/.ssh/ chown -Rv hydra-queue-runner /var/lib/hydra/queue-runner - ln -svf ${sshConfig} /var/lib/hydra/queue-runner/.ssh/config + ln -svf ${sshConfig "/var/lib/hydra/queue-runner"} /var/lib/hydra/queue-runner/.ssh/config mkdir -p /var/lib/hydra/.ssh/ chown -Rv hydra /var/lib/hydra/.ssh - ln -svf ${sshConfig} /var/lib/hydra/.ssh/config + ln -svf ${sshConfig "/var/lib/hydra"} /var/lib/hydra/.ssh/config ''; sops.secrets."attic/config.toml" = { owner = "hydra-queue-runner"; From 4f4fc732faab0ad606a4ee2426f029734f678feb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 14:56:55 +0100 Subject: [PATCH 06/13] fix shell quoting --- config/services/hydra/remote-eval-jobs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/services/hydra/remote-eval-jobs.py b/config/services/hydra/remote-eval-jobs.py index 7815683c..c770d995 100644 --- a/config/services/hydra/remote-eval-jobs.py +++ b/config/services/hydra/remote-eval-jobs.py @@ -3,6 +3,7 @@ import sys import subprocess import os import json +import shlex # First check if the server is up @@ -44,7 +45,7 @@ remote_args += ["--workers" "4"] subprocess.run(["@nix@", "copy"] + list(inputs_to_copy) + ["--to", "ssh://build-rainbow-resort", "--no-check-sigs"], check=True, stdout=subprocess.DEVNULL) # Evaluate on target -result = subprocess.run(["@ssh@", "build-rainbow-resort", "nix-eval-jobs"] + remote_args, check=True, stdout=subprocess.PIPE, text=True) +result = subprocess.run(["@ssh@", "build-rainbow-resort", "nix-eval-jobs"] + list(map(shlex.quote, remote_args)), check=True, stdout=subprocess.PIPE, text=True) for line in result.stdout: try: From d08f1d31a7dc6c68223ccf72b358e12b19582b03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 15:11:44 +0100 Subject: [PATCH 07/13] fix workers args --- config/services/hydra/remote-eval-jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/services/hydra/remote-eval-jobs.py b/config/services/hydra/remote-eval-jobs.py index c770d995..2661bfa7 100644 --- a/config/services/hydra/remote-eval-jobs.py +++ b/config/services/hydra/remote-eval-jobs.py @@ -38,7 +38,7 @@ for arg in sys.argv[1:]: next_to_copy = True remote_args.append(arg) -remote_args += ["--workers" "4"] +remote_args += ["--workers", "4"] # copy over what files we need to ensure are present on the target From 58486b6d680f09b85f975a22a292215b7e737150 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 15:28:15 +0100 Subject: [PATCH 08/13] fix more --- config/services/hydra/remote-eval-jobs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/services/hydra/remote-eval-jobs.py b/config/services/hydra/remote-eval-jobs.py index 2661bfa7..ccae839f 100644 --- a/config/services/hydra/remote-eval-jobs.py +++ b/config/services/hydra/remote-eval-jobs.py @@ -38,7 +38,7 @@ for arg in sys.argv[1:]: next_to_copy = True remote_args.append(arg) -remote_args += ["--workers", "4"] +remote_args += ["--workers", "4", "--gc-roots-dir", "/tmp"] # copy over what files we need to ensure are present on the target @@ -58,5 +58,5 @@ for line in result.stdout: os.symlink(data["drvPath"], os.path.join(gcroots, drvBasename)) # Now we are done with this job, we can tell hydra about it print(line) - except e: + except Exception as e: print(e, file=sys.stderr) \ No newline at end of file From 5cf0bf91c008f1e8f63a76ca8d2138a2e8dfe7f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 15:31:05 +0100 Subject: [PATCH 09/13] read the stdout output line by line --- config/services/hydra/remote-eval-jobs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/services/hydra/remote-eval-jobs.py b/config/services/hydra/remote-eval-jobs.py index ccae839f..c3dc34b9 100644 --- a/config/services/hydra/remote-eval-jobs.py +++ b/config/services/hydra/remote-eval-jobs.py @@ -40,14 +40,14 @@ for arg in sys.argv[1:]: remote_args += ["--workers", "4", "--gc-roots-dir", "/tmp"] -# copy over what files we need to ensure are present on the target - -subprocess.run(["@nix@", "copy"] + list(inputs_to_copy) + ["--to", "ssh://build-rainbow-resort", "--no-check-sigs"], check=True, stdout=subprocess.DEVNULL) +if len(inputs_to_copy) != 0: + # copy over what files we need to ensure are present on the target + subprocess.run(["@nix@", "copy"] + list(inputs_to_copy) + ["--to", "ssh://build-rainbow-resort", "--no-check-sigs"], check=True, stdout=subprocess.DEVNULL) # Evaluate on target result = subprocess.run(["@ssh@", "build-rainbow-resort", "nix-eval-jobs"] + list(map(shlex.quote, remote_args)), check=True, stdout=subprocess.PIPE, text=True) -for line in result.stdout: +for line in iter(result.stdout.readline, ""): try: data = json.loads(line) # copy .drv file home From 1ef23e9750264d2b685ff169d0c575251d992f1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 15:47:38 +0100 Subject: [PATCH 10/13] linebuffer --- config/services/hydra/remote-eval-jobs.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/config/services/hydra/remote-eval-jobs.py b/config/services/hydra/remote-eval-jobs.py index c3dc34b9..c8d0489d 100644 --- a/config/services/hydra/remote-eval-jobs.py +++ b/config/services/hydra/remote-eval-jobs.py @@ -45,7 +45,7 @@ if len(inputs_to_copy) != 0: subprocess.run(["@nix@", "copy"] + list(inputs_to_copy) + ["--to", "ssh://build-rainbow-resort", "--no-check-sigs"], check=True, stdout=subprocess.DEVNULL) # Evaluate on target -result = subprocess.run(["@ssh@", "build-rainbow-resort", "nix-eval-jobs"] + list(map(shlex.quote, remote_args)), check=True, stdout=subprocess.PIPE, text=True) +result = subprocess.Popen(["@ssh@", "build-rainbow-resort", "nix-eval-jobs"] + list(map(shlex.quote, remote_args)), bufsize=1, stdout=subprocess.PIPE, text=True) for line in iter(result.stdout.readline, ""): try: @@ -59,4 +59,6 @@ for line in iter(result.stdout.readline, ""): # Now we are done with this job, we can tell hydra about it print(line) except Exception as e: - print(e, file=sys.stderr) \ No newline at end of file + print(e, file=sys.stderr) + +sys.exit(result.wait()) \ No newline at end of file From 90cb5d2e0fdf0a9d98242f27fe6f53759c2059a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 15:47:54 +0100 Subject: [PATCH 11/13] move gcroots arg up --- config/services/hydra/remote-eval-jobs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/services/hydra/remote-eval-jobs.py b/config/services/hydra/remote-eval-jobs.py index c8d0489d..73b84ac1 100644 --- a/config/services/hydra/remote-eval-jobs.py +++ b/config/services/hydra/remote-eval-jobs.py @@ -23,11 +23,11 @@ gcroots = None for arg in sys.argv[1:]: if arg == "--gc-roots-dir" or arg == "--max-jobs" or arg == "--workers": skip_next = 2 - if arg == "--gc-roots-dir": - next_to_gcroots = True if next_to_gcroots: next_to_gcroots = False gcroots = arg + if arg == "--gc-roots-dir": + next_to_gcroots = True if skip_next > 0: skip_next -= 1 continue From 54add2bd4e38f12089d7ad20d0496862981ece14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 16:02:11 +0100 Subject: [PATCH 12/13] ignore existing symlinks --- config/services/hydra/remote-eval-jobs.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/config/services/hydra/remote-eval-jobs.py b/config/services/hydra/remote-eval-jobs.py index 73b84ac1..674caed3 100644 --- a/config/services/hydra/remote-eval-jobs.py +++ b/config/services/hydra/remote-eval-jobs.py @@ -55,7 +55,10 @@ for line in iter(result.stdout.readline, ""): # if we have a gcroot, add it to it if gcroots is not None: drvBasename = os.path.basename(data["drvPath"]) - os.symlink(data["drvPath"], os.path.join(gcroots, drvBasename)) + try: + os.symlink(data["drvPath"], os.path.join(gcroots, drvBasename)) + except: + pass # Now we are done with this job, we can tell hydra about it print(line) except Exception as e: From 76b88ee189a2a9289895aa62afd4beead9f0d389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Charlotte=20=F0=9F=A6=9D=20Delenk?= Date: Sun, 1 Dec 2024 16:03:23 +0100 Subject: [PATCH 13/13] strip line --- config/services/hydra/remote-eval-jobs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/config/services/hydra/remote-eval-jobs.py b/config/services/hydra/remote-eval-jobs.py index 674caed3..bc4be8fc 100644 --- a/config/services/hydra/remote-eval-jobs.py +++ b/config/services/hydra/remote-eval-jobs.py @@ -49,6 +49,7 @@ result = subprocess.Popen(["@ssh@", "build-rainbow-resort", "nix-eval-jobs"] + l for line in iter(result.stdout.readline, ""): try: + line = line.strip() data = json.loads(line) # copy .drv file home subprocess.run(["@nix@", "copy", data["drvPath"], "--from", "ssh://build-rainbow-resort", "--no-check-sigs"], check=True, stdout=subprocess.DEVNULL)