Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/clean-equinix-runner/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Clean Equinix Runner
description: Deletes a given Equinix runner
inputs:
runner_name:
description: Name of the runner to be deleted
required: true
project_id:
description: Project ID of the Equinix project
required: true
api_token:
description: API token of the Equinix project
required: true
runs:
using: composite
steps:
- name: Delete runner
# Using custom equinix metal sweeper action instead of official one,
# as the official action does not support deleting a specific runner.
# See: https://github.com/sustainable-computing-io/metal-sweeper-action/pull/1
uses: sustainable-computing-io/metal-sweeper-action@main
with:
authToken: ${{ inputs.api_token }}
projectID: ${{ inputs.project_id }}
runnerName: ${{ inputs.runner_name }}
34 changes: 34 additions & 0 deletions .github/create-equinix-runner/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Create Equinix Runner
description: Creates a new Equinix runner for a given OS
inputs:
os_name:
description: Name of the OS to be requested for the runner
required: true
github_token:
description: GitHub token
required: true
metal_auth_token:
description: Equinix API token
required: true
metal_project_id:
description: Equinix project ID
required: true

runs:
using: composite
steps:
- name: Create runner
# Using custom equinix metal runner action instead of official one,
# as the official action does not support passing organization name parameter.
# This is necessary for workflows triggered by events like `issue_comment`, where
# the `github.organization` context is not available.
# See: https://github.com/sustainable-computing-io/metal-runner-action/pull/1
uses: sustainable-computing-io/metal-runner-action@main
with:
github_token: ${{ inputs.github_token }}
metal_auth_token: ${{ inputs.metal_auth_token }}
metal_project_id: ${{ inputs.metal_project_id }}
metro: da
plan: c3.small.x86
os: ${{ inputs.os_name }}
organization: sustainable-computing-io
189 changes: 189 additions & 0 deletions .github/workflows/k8s-equinix.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
name: Build and Deploy K8s on Equinix

on: #yamllint disable-line rule:truthy
pull_request:
branches: [reboot]
# In order to trigger this workflow on comment, GH accepts the workflow to be available in the default branch only.
# issue_comment:
# types: [created]

permissions:
pull-requests: write
contents: write

jobs:
create-runner:
# TODO: Uncomment once we enable workflow on issue_comment
# if: github.event.issue.pull_request && contains(github.event.comment.body, '/test-equinix')
runs-on: ubuntu-latest
steps:
- name: Checkout source
uses: actions/checkout@v4
with:
ref: ${{ github.event.issue.pull_request.head.sha }}

- name: Create Equinix runner
uses: ./.github/create-equinix-runner
with:
github_token: ${{ secrets.GH_SELF_HOSTED_RUNNER_TOKEN }}
metal_auth_token: ${{ secrets.EQUINIX_API_TOKEN }}
metal_project_id: ${{ secrets.EQUINIX_PROJECT_ID }}
os_name: ubuntu_22_04

build-and-deploy:
needs: create-runner
runs-on: self-hosted
env:
GOCACHE: /home/ghrunner/.cache/go-build
HOME: /home/ghrunner
outputs:
runner_name: ${{ runner.name }}
steps:
- name: Install Docker
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y apt-transport-https ca-certificates curl software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list
sudo apt-get update
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
sudo usermod -aG docker root

- name: Verify Docker installation
shell: bash
run: |
docker ps
docker --version

- name: Install Kind
shell: bash
run: |
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.23.0/kind-linux-amd64
chmod +x ./kind
sudo mv ./kind /usr/local/bin/kind

- name: Verify Kind installation
shell: bash
run: |
kind version

- name: Install Kubectl
shell: bash
run: |
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
chmod +x ./kubectl
sudo mv ./kubectl /usr/local/bin/kubectl

- name: Verify Kubectl installation
shell: bash
run: |
kubectl version --client

- name: Checkout source
uses: actions/checkout@v4
with:
ref: ${{ github.event.issue.pull_request.head.sha }}
fetch-depth: 0

- name: Setup Go
uses: actions/setup-go@v5.4.0
with:
go-version-file: go.mod
cache: false

- name: Build image
shell: bash
run: |
make image
env:
IMG_BASE: localhost:5001
VERSION: dev

- name: Setup Kind cluster
shell: bash
run: |
make cluster-up

- name: Push image
shell: bash
run: |
make push
env:
IMG_BASE: localhost:5001
VERSION: dev

- name: Deploy Kepler
shell: bash
run: |
make deploy
env:
IMG_BASE: localhost:5001
VERSION: dev

- name: Verify Kepler deployment
shell: bash
run: |
kubectl rollout status daemonset/kepler -n kepler --timeout=5m

# TODO: Move this once we add validator tool to the repo
- name: Validate metric endpoint
id: validate
shell: bash
run: |
kubectl port-forward service/kepler 28282:28282 -n kepler &
sleep 20 # sleep for 20 seconds to give the service time to start

HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:28282/metrics)
[[ $HTTP_STATUS -ne 200 ]] && echo "HTTP status code is not 200" && exit 1

curl -s -o /tmp/metrics.txt http://localhost:28282/metrics

for metric in kepler_process_cpu_watts \
kepler_node_cpu_info \
kepler_process_cpu_joules_total \
kepler_container_cpu_joules_total \
kepler_container_cpu_watts; do
echo "Checking metric: $metric"
if ! grep -q "$metric" /tmp/metrics.txt; then
echo "Metric $metric not found"
exit 1
fi
done

- name: Run must gather
if: failure()
shell: bash
run: |
echo "::group::Get pods in kepler namespace"
kubectl get pods -n kepler || true
echo "::endgroup::"

echo "::group::Get pods in monitoring namespace"
kubectl get pods -n monitoring || true
echo "::endgroup::"

echo "::group::Get logs for kepler daemonset"
kubectl logs daemonset/kepler -n kepler || true
echo "::endgroup::"

echo "::group::Fetch metrics from localhost:28282"
curl -s http://localhost:28282/metrics || true
echo "::endgroup::"

cleanup-runner:
needs: build-and-deploy
runs-on: ubuntu-latest
if: always() # Run even if the previous job fails
steps:
- name: Checkout source
uses: actions/checkout@v4
with:
ref: ${{ github.event.issue.pull_request.head.sha }}

- name: Clean Equinix runner
uses: ./.github/clean-equinix-runner
with:
runner_name: ${{ needs.build-and-deploy.outputs.runner_name }}
project_id: ${{ secrets.EQUINIX_PROJECT_ID }}
api_token: ${{ secrets.EQUINIX_API_TOKEN }}