Blog

Setting Forgejo and Forgejo actions with `Docker Compose`, with `Nix` based actions

Docker Compose Configuration

traefik:
  # ...
  command:
    # ...
    - "--entrypoints.ssh.address=:222"
  ports:
    # ...
    - "222:222"
forgejo:
    container_name: forgejo
    image: codeberg.org/forgejo/forgejo:11
    environment:
        - USER_UID=1000
        - USER_GID=1000
        - FORGEJO__database__DB_TYPE=postgres
        - FORGEJO__database__HOST=pgforgejo:5432
        - FORGEJO__database__NAME=forgejo
        - FORGEJO__database__USER=forgejo
        - FORGEJO__database__PASSWD=forgejo
    restart: always
    networks:
        - forgejo
        - <network name>
    volumes:
        - ./forgejo:/data
        - /etc/timezone:/etc/timezone:ro
        - /etc/localtime:/etc/localtime:ro
    # ports:
    #     - "3000:3000"
    #     - "222:22"
    depends_on:
        - pgforgejo
    labels:
        - "traefik.enable=true"
        - "traefik.http.routers.forgejo.rule=Host(`git.example.com`)"
        - "traefik.http.routers.forgejo.entrypoints=websecure"
        - "traefik.http.routers.forgejo.tls.certresolver=myhttpchallenge"
        - "traefik.http.routers.forgejo.service=forgejo"

        - "traefik.http.routers.forgejo-http.rule=Host(`git.example.com`)"
        - "traefik.http.routers.forgejo-http.entrypoints=web"
        - "traefik.http.routers.forgejo-http.middlewares=forgejo-redirect"
        - "traefik.http.middlewares.forgejo-redirect.redirectscheme.scheme=https"
        - "traefik.http.middlewares.forgejo-redirect.redirectscheme.permanent=true"

        - "traefik.http.services.forgejo.loadbalancer.server.port=3000"
        - "traefik.docker.network=<network name>"
        - "traefik.tcp.routers.forgejo-ssh.entrypoints=ssh"
        - "traefik.tcp.routers.forgejo-ssh.rule=HostSNI(`*`)"
        - "traefik.tcp.routers.forgejo-ssh.service=forgejo-ssh"
        - "traefik.tcp.services.forgejo-ssh.loadbalancer.server.port=22"

pgforgejo:
    container_name: pgforgejo
    image: postgres:17.6-alpine
    restart: always
    environment:
        - POSTGRES_USER=forgejo
        - POSTGRES_PASSWORD=forgejo
        - POSTGRES_DB=forgejo
    networks:
        - forgejo
    volumes:
        - ./pgforgejo:/var/lib/postgresql/data

docker-in-docker:
    container_name: docker-dind
    image: docker:dind
    privileged: "true"
    command: ["dockerd", "-H", "tcp://0.0.0.0:2375", "--tls=false"]
    restart: "unless-stopped"
    networks:
        - forgejo

forgejo-action:
    container_name: "forgejo-action"
    image: "data.forgejo.org/forgejo/runner:9"
    links:
        - docker-in-docker
    depends_on:
        docker-in-docker:
            condition: service_started
    environment:
        DOCKER_HOST: tcp://docker-in-docker:2375
    networks:
        - forgejo
    # User without root privileges, but with access to `./data`.
    user: 1001:1001
    volumes:
        - ./forgejo-data:/data
    restart: "unless-stopped"
    # command: '/bin/sh -c "while : ; do sleep 1 ; done ;"'
    command: '/bin/sh -c "sleep 5; forgejo-runner --config config.yaml daemon"'

Forgejo Actions

docker exec -it forgejo-action /bin/sh

forgejo-runner register --no-interactive --token {TOKEN} --name runner --instance https://git.example.com --labels bookworm:docker://node:24-bookworm

forgejo-runner generate-config > config.yaml
# config.yaml
container:
    force_pull: true
    force_rebuild: true
# .runner
{
  "WARNING": "This file is automatically generated by act-runner. Do not edit it manually unless you know what you are doing. Removing this file will cause act runner to re-register as a new runner.",
  "id": 1,
  "uuid": "****",
  "name": "<runner name>",
  "token": "****",
  "address": "https://git.example.com",
  "labels": [
    "bookworm:docker://node:24-bookworm",
    "nix-base:docker://docker.nix-community.org/nixpkgs/nix-unstable:latest",
    "nix:docker://git.nexveridian.com/nexveridian/action-attic:latest"
  ]
}

Available runner images

UT Austin Class Schedule

SemesterCourse NameCategorie
2025 FallCase Studies in Machine LearningElective
2025 FallDeep LearningApplication
2026 SpringAdvances in Deep LearningElective
2026 SpringAndroid ProgrammingSystems
2026 FallParallel SystemsSystems
2027 SpringAutomated Logical ReasoningTheory

Using `serde_json` or `serde` data, in `datafusion`

Getting data into datafusion is not well documented, especially using serde_json or serde data.

This example shows how to convert a serde_json::Value::Array into a datafusion DataFrame, manipulate the dataframe in datafusion, then convert it back to serde_json.

# Cargo.toml
datafusion = "47.0.0"
serde_arrow = { version = "0.13.3", features = ["arrow-55"] }
// `serde_json::Value`
let json = serde_json::json!([{
    "date": "2025-06-05",
    "test": "test",
    "price": 1.01,
}]);

let ctx = SessionContext::new();

let serde_json::Value::Array(json_array) = &json else {
    return Err(anyhow::anyhow!("Expected JSON array, got different type"));
};

if json_array.is_empty() {
    return Ok(Vec::new());
}

// Configure `TracingOptions` to allow null fields and coerce numbers
let tracing_options = TracingOptions::default()
    .allow_null_fields(true)
    .coerce_numbers(true);

// Get the schema from actual data, using samples, with `TracingOptions`
let fields = Vec::<FieldRef>::from_samples(json_array, tracing_options)?;

// Convert `serde_json::Value::Array` to `RecordBatch` using `serde_arrow`
let record_batch = serde_arrow::to_record_batch(&fields, &json_array)?;

// Create a DataFrame from the `RecordBatch`
let mut df = ctx.read_batch(record_batch)?;

// Add a new column `new_col` using DataFrame API
df = df.with_column("new_col", lit("test".to_string()))?;

// Execute the DataFrame query
let result_batches = df.collect().await?;

// Convert back to `serde_json` using `serde_arrow`
let all_json_values = result_batches
    .into_iter()
    .flat_map(|batch| {
        serde_arrow::from_record_batch(&batch).unwrap_or_else(|_| Vec::new())
    })
    .collect::<Vec<serde_json::Value>>();

#[derive(Default, Debug, Clone, Deserialize, Serialize)]
pub struct TestData {
    date: String,
    test: String,
    price: f64,
    new_col: String,
}

// Convert the `serde_json::Value` to Vec<TestData>
let test_data: Vec<TestData> =
    serde_json::from_value(serde_json::Value::Array(all_json_values))?;

assert_eq!(
    test_data,
    Vec![
        TestData {
            date: "2025-06-05".to_string(),
            test: "test".to_string(),
            price: 1.01,
            new_col: "test".to_string(),
        },
    ]
);

Or you use can use this datafusion_ext

// src/utils/datafusion_ext.rs
use anyhow::Error;
use datafusion::{arrow::datatypes::FieldRef, dataframe::DataFrame, prelude::*};
use serde_arrow::schema::{SchemaLike, TracingOptions};

pub trait JsonValueExt {
    /// Converts a `serde_json::Value::Array` into a `datafusion::dataframe`
    fn to_df(&self) -> Result<DataFrame, Error>;
}

impl JsonValueExt for serde_json::Value {
    fn to_df(&self) -> Result<DataFrame, Error> {
        let ctx = SessionContext::new();

        let Self::Array(json_array) = self else {
            return Err(anyhow::anyhow!(
                "Expected `serde_json::Value::Array`, got different type"
            ));
        };

        if json_array.is_empty() {
            return Err(anyhow::anyhow!("Empty `serde_json::Value::Array` provided"));
        }

        let tracing_options = TracingOptions::default()
            .allow_null_fields(true)
            .coerce_numbers(true);

        let fields = Vec::<FieldRef>::from_samples(json_array, tracing_options)?;
        let record_batch = serde_arrow::to_record_batch(&fields, &json_array)?;

        let df = ctx.read_batch(record_batch)?;

        Ok(df)
    }
}

#[async_trait::async_trait]
pub trait DataFrameExt {
    /// Collects a `datafusion::dataframe` and deserializes it to a Vec of the
    /// specified type
    async fn to_vec<T>(&self) -> Result<Vec<T>, Error>
    where
        T: serde::de::DeserializeOwned;
}

#[async_trait::async_trait]
impl DataFrameExt for DataFrame {
    async fn to_vec<T>(&self) -> Result<Vec<T>, Error>
    where
        T: serde::de::DeserializeOwned,
    {
        let result_batches = self.clone().collect().await?;

        let all_json_values = result_batches
            .into_iter()
            .flat_map(|batch| serde_arrow::from_record_batch(&batch).unwrap_or_else(|_| Vec::new()))
            .collect::<Vec<serde_json::Value>>();

        let typed_result: Vec<T> =
            serde_json::from_value(serde_json::Value::Array(all_json_values))?;

        Ok(typed_result)
    }
}
use utils::datafusion_ext::{DataFrameExt, JsonValueExt};

let json = serde_json::json!([{
    "date": "2025-06-05",
    "test": "test",
    "price": 1.01,
}]);

let mut df = json.to_df()?;

df = df.with_column("new_col", lit("test".to_string()))?;

#[derive(Default, Debug, Clone, Deserialize, Serialize)]
pub struct TestData {
    date: String,
    test: String,
    price: f64,
    new_col: String,
}

let etfs = df.to_vec::<TestData>().await?;

assert_eq!(
    test_data,
    Vec![
        TestData {
            date: "2025-06-05".to_string(),
            test: "test".to_string(),
            price: 1.01,
            new_col: "test".to_string(),
        },
    ]
);