refactor(modules): reduce host repetition

This commit is contained in:
2026-03-28 10:52:59 +00:00
parent 1bb97448a4
commit 94baea90d6
47 changed files with 876 additions and 722 deletions

View File

@@ -1,69 +0,0 @@
{config, ...}: {
services.adguardhome = {
enable = true;
host = "127.0.0.1";
port = 10000;
settings = {
dhcp = {
enabled = false;
};
dns = {
upstream_dns = [
"1.1.1.1"
"1.0.0.1"
];
};
filtering = {
protection_enabled = true;
filtering_enabled = true;
safe_search = {
enabled = false;
};
safebrowsing_enabled = true;
blocked_response_ttl = 10;
filters_update_interval = 24;
blocked_services = {
ids = [
"reddit"
"twitter"
];
};
};
filters = [
{
enabled = true;
url = "https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/adblock/pro.txt";
name = "HaGeZi Multi PRO";
id = 1;
}
{
enabled = true;
url = "https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/adblock/tif.txt";
name = "HaGeZi Threat Intelligence Feeds";
id = 2;
}
{
enabled = true;
url = "https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/adblock/gambling.txt";
name = "HaGeZi Gambling";
id = 3;
}
{
enabled = true;
url = "https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/adblock/nsfw.txt";
name = "HaGeZi NSFW";
id = 4;
}
];
};
};
services.caddy.virtualHosts."adguard.manticore-hippocampus.ts.net" = {
extraConfig = ''
tls {
get_certificate tailscale
}
reverse_proxy localhost:${toString config.services.adguardhome.port}
'';
};
}

View File

@@ -1,10 +0,0 @@
{...}: {
services.caddy.virtualHosts."cache.manticore-hippocampus.ts.net" = {
extraConfig = ''
tls {
get_certificate tailscale
}
reverse_proxy localhost:32843
'';
};
}

View File

@@ -1,128 +0,0 @@
{
config,
inputs',
pkgs,
...
}: let
homeDir = "/home/cschmatzler";
notabilityScripts = ./notability;
dataRoot = "${homeDir}/.local/share/notability-ingest";
stateRoot = "${homeDir}/.local/state/notability-ingest";
notesRoot = "${homeDir}/Notes";
webdavRoot = "${dataRoot}/webdav-root";
userPackages = with pkgs; [
qmd
poppler-utils
rclone
sqlite
zk
];
commonPath = with pkgs;
[
inputs'.llm-agents.packages.pi
coreutils
inotify-tools
nushell
util-linux
]
++ userPackages;
commonEnvironment = {
HOME = homeDir;
NOTABILITY_ARCHIVE_ROOT = "${dataRoot}/archive";
NOTABILITY_DATA_ROOT = dataRoot;
NOTABILITY_DB_PATH = "${stateRoot}/db.sqlite";
NOTABILITY_NOTES_DIR = notesRoot;
NOTABILITY_RENDER_ROOT = "${dataRoot}/rendered-pages";
NOTABILITY_SESSIONS_ROOT = "${stateRoot}/sessions";
NOTABILITY_STATE_ROOT = stateRoot;
NOTABILITY_TRANSCRIPT_ROOT = "${stateRoot}/transcripts";
NOTABILITY_WEBDAV_ROOT = webdavRoot;
XDG_CONFIG_HOME = "${homeDir}/.config";
};
mkTmpDirRule = path: "d ${path} 0755 cschmatzler users -";
mkNotabilityService = {
description,
script,
after ? [],
requires ? [],
environment ? {},
}: {
inherit after description requires;
wantedBy = ["multi-user.target"];
path = commonPath;
environment = commonEnvironment // environment;
serviceConfig = {
ExecStart = "${pkgs.nushell}/bin/nu ${notabilityScripts}/${script}";
Group = "users";
Restart = "always";
RestartSec = 5;
User = "cschmatzler";
WorkingDirectory = homeDir;
};
};
in {
sops.secrets.tahani-notability-webdav-password = {
sopsFile = ../../../../secrets/tahani-notability-webdav-password;
format = "binary";
owner = "cschmatzler";
path = "/run/secrets/tahani-notability-webdav-password";
};
home-manager.users.cschmatzler = {
home.packages = userPackages;
home.file.".config/qmd/index.yml".text = ''
collections:
notes:
path: ${notesRoot}
pattern: "**/*.md"
'';
};
systemd.tmpfiles.rules =
builtins.map mkTmpDirRule [
notesRoot
dataRoot
webdavRoot
"${dataRoot}/archive"
"${dataRoot}/rendered-pages"
stateRoot
"${stateRoot}/jobs"
"${stateRoot}/jobs/queued"
"${stateRoot}/jobs/running"
"${stateRoot}/jobs/failed"
"${stateRoot}/jobs/done"
"${stateRoot}/jobs/results"
"${stateRoot}/sessions"
"${stateRoot}/transcripts"
];
services.caddy.virtualHosts."tahani.manticore-hippocampus.ts.net".extraConfig = ''
tls {
get_certificate tailscale
}
handle /notability* {
reverse_proxy 127.0.0.1:9980
}
'';
systemd.services.notability-webdav =
mkNotabilityService {
description = "Notability WebDAV landing zone";
script = "webdav.nu";
after = ["network.target"];
environment = {
NOTABILITY_WEBDAV_ADDR = "127.0.0.1:9980";
NOTABILITY_WEBDAV_BASEURL = "/notability";
NOTABILITY_WEBDAV_PASSWORD_FILE = config.sops.secrets.tahani-notability-webdav-password.path;
NOTABILITY_WEBDAV_USER = "notability";
};
};
systemd.services.notability-watch =
mkNotabilityService {
description = "Watch and ingest Notability WebDAV uploads";
script = "watch.nu";
after = ["notability-webdav.service"];
requires = ["notability-webdav.service"];
};
}

View File

@@ -1,141 +0,0 @@
#!/usr/bin/env nu
use ./lib.nu *
def active-job-exists [note_id: string, source_hash: string] {
let rows = (sql-json $"
select job_id
from jobs
where note_id = (sql-quote $note_id)
and source_hash = (sql-quote $source_hash)
and status != 'done'
and status != 'failed'
limit 1;
")
not ($rows | is-empty)
}
export def archive-and-version [note_id: string, source_path: path, source_relpath: string, source_size: any, source_mtime: string, source_hash: string] {
let source_size_int = ($source_size | into int)
let archive_path = (archive-path-for $note_id $source_hash $source_relpath)
cp $source_path $archive_path
let version_id = (new-version-id)
let seen_at = (now-iso)
let version_id_q = (sql-quote $version_id)
let note_id_q = (sql-quote $note_id)
let seen_at_q = (sql-quote $seen_at)
let archive_path_q = (sql-quote $archive_path)
let source_hash_q = (sql-quote $source_hash)
let source_mtime_q = (sql-quote $source_mtime)
let source_relpath_q = (sql-quote $source_relpath)
let sql = ([
"insert into versions (version_id, note_id, seen_at, archive_path, source_hash, source_size, source_mtime, source_relpath, ingest_result, session_path) values ("
$version_id_q
", "
$note_id_q
", "
$seen_at_q
", "
$archive_path_q
", "
$source_hash_q
", "
($source_size_int | into string)
", "
$source_mtime_q
", "
$source_relpath_q
", 'pending', null);"
] | str join '')
sql-run $sql | ignore
{
version_id: $version_id
seen_at: $seen_at
archive_path: $archive_path
}
}
export def enqueue-job [
note: record,
operation: string,
input_path: string,
archive_path: string,
source_hash: string,
title: string,
force_overwrite_generated: bool = false,
source_transport: string = 'webdav',
] {
if (active-job-exists $note.note_id $source_hash) {
return null
}
let job_id = (new-job-id)
let requested_at = (now-iso)
let manifest_path = (manifest-path-for $job_id 'queued')
let result_path = (result-path-for $job_id)
let transcript_path = (transcript-path-for $note.note_id $job_id)
let session_dir = ([(sessions-root) $note.note_id $job_id] | path join)
mkdir $session_dir
let manifest = {
version: 1
job_id: $job_id
note_id: $note.note_id
operation: $operation
requested_at: $requested_at
title: $title
source_relpath: $note.source_relpath
source_path: $note.source_path
input_path: $input_path
archive_path: $archive_path
output_path: $note.output_path
transcript_path: $transcript_path
result_path: $result_path
session_dir: $session_dir
source_hash: $source_hash
last_generated_output_hash: ($note.last_generated_output_hash? | default null)
force_overwrite_generated: $force_overwrite_generated
source_transport: $source_transport
}
($manifest | to json --indent 2) | save -f $manifest_path
let job_id_q = (sql-quote $job_id)
let note_id_q = (sql-quote $note.note_id)
let operation_q = (sql-quote $operation)
let requested_at_q = (sql-quote $requested_at)
let source_hash_q = (sql-quote $source_hash)
let manifest_path_q = (sql-quote $manifest_path)
let result_path_q = (sql-quote $result_path)
let sql = ([
"insert into jobs (job_id, note_id, operation, status, requested_at, source_hash, job_manifest_path, result_path) values ("
$job_id_q
", "
$note_id_q
", "
$operation_q
", 'queued', "
$requested_at_q
", "
$source_hash_q
", "
$manifest_path_q
", "
$result_path_q
");"
] | str join '')
sql-run $sql | ignore
{
job_id: $job_id
requested_at: $requested_at
manifest_path: $manifest_path
result_path: $result_path
transcript_path: $transcript_path
session_dir: $session_dir
}
}

View File

@@ -1,433 +0,0 @@
export def home-dir [] {
$nu.home-dir
}
export def data-root [] {
if ('NOTABILITY_DATA_ROOT' in ($env | columns)) {
$env.NOTABILITY_DATA_ROOT
} else {
[$nu.home-dir ".local" "share" "notability-ingest"] | path join
}
}
export def state-root [] {
if ('NOTABILITY_STATE_ROOT' in ($env | columns)) {
$env.NOTABILITY_STATE_ROOT
} else {
[$nu.home-dir ".local" "state" "notability-ingest"] | path join
}
}
export def notes-root [] {
if ('NOTABILITY_NOTES_DIR' in ($env | columns)) {
$env.NOTABILITY_NOTES_DIR
} else {
[$nu.home-dir "Notes"] | path join
}
}
export def webdav-root [] {
if ('NOTABILITY_WEBDAV_ROOT' in ($env | columns)) {
$env.NOTABILITY_WEBDAV_ROOT
} else {
[(data-root) "webdav-root"] | path join
}
}
export def archive-root [] {
if ('NOTABILITY_ARCHIVE_ROOT' in ($env | columns)) {
$env.NOTABILITY_ARCHIVE_ROOT
} else {
[(data-root) "archive"] | path join
}
}
export def render-root [] {
if ('NOTABILITY_RENDER_ROOT' in ($env | columns)) {
$env.NOTABILITY_RENDER_ROOT
} else {
[(data-root) "rendered-pages"] | path join
}
}
export def transcript-root [] {
if ('NOTABILITY_TRANSCRIPT_ROOT' in ($env | columns)) {
$env.NOTABILITY_TRANSCRIPT_ROOT
} else {
[(state-root) "transcripts"] | path join
}
}
export def jobs-root [] {
if ('NOTABILITY_JOBS_ROOT' in ($env | columns)) {
$env.NOTABILITY_JOBS_ROOT
} else {
[(state-root) "jobs"] | path join
}
}
export def queued-root [] {
[(jobs-root) "queued"] | path join
}
export def running-root [] {
[(jobs-root) "running"] | path join
}
export def failed-root [] {
[(jobs-root) "failed"] | path join
}
export def done-root [] {
[(jobs-root) "done"] | path join
}
export def results-root [] {
[(jobs-root) "results"] | path join
}
export def sessions-root [] {
if ('NOTABILITY_SESSIONS_ROOT' in ($env | columns)) {
$env.NOTABILITY_SESSIONS_ROOT
} else {
[(state-root) "sessions"] | path join
}
}
export def qmd-dirty-file [] {
[(state-root) "qmd-dirty"] | path join
}
export def db-path [] {
if ('NOTABILITY_DB_PATH' in ($env | columns)) {
$env.NOTABILITY_DB_PATH
} else {
[(state-root) "db.sqlite"] | path join
}
}
export def now-iso [] {
date now | format date "%Y-%m-%dT%H:%M:%SZ"
}
export def sql-quote [value?: any] {
if $value == null {
"NULL"
} else {
let text = ($value | into string | str replace -a "'" "''")
["'" $text "'"] | str join ''
}
}
export def sql-run [sql: string] {
let database = (db-path)
let result = (^sqlite3 -cmd '.timeout 5000' $database $sql | complete)
if $result.exit_code != 0 {
error make {
msg: $"sqlite3 failed: ($result.stderr | str trim)"
}
}
$result.stdout
}
export def sql-json [sql: string] {
let database = (db-path)
let result = (^sqlite3 -cmd '.timeout 5000' -json $database $sql | complete)
if $result.exit_code != 0 {
error make {
msg: $"sqlite3 failed: ($result.stderr | str trim)"
}
}
let text = ($result.stdout | str trim)
if $text == "" {
[]
} else {
$text | from json
}
}
export def ensure-layout [] {
mkdir (data-root)
mkdir (state-root)
mkdir (notes-root)
mkdir (webdav-root)
mkdir (archive-root)
mkdir (render-root)
mkdir (transcript-root)
mkdir (jobs-root)
mkdir (queued-root)
mkdir (running-root)
mkdir (failed-root)
mkdir (done-root)
mkdir (results-root)
mkdir (sessions-root)
sql-run '
create table if not exists notes (
note_id text primary key,
source_relpath text not null unique,
title text not null,
output_path text not null,
status text not null,
first_seen_at text not null,
last_seen_at text not null,
last_processed_at text,
missing_since text,
deleted_at text,
current_source_hash text,
current_source_size integer,
current_source_mtime text,
current_archive_path text,
latest_version_id text,
last_generated_source_hash text,
last_generated_output_hash text,
conflict_path text,
last_error text
);
create table if not exists versions (
version_id text primary key,
note_id text not null,
seen_at text not null,
archive_path text not null unique,
source_hash text not null,
source_size integer not null,
source_mtime text not null,
source_relpath text not null,
ingest_result text,
session_path text,
foreign key (note_id) references notes (note_id)
);
create table if not exists jobs (
job_id text primary key,
note_id text not null,
operation text not null,
status text not null,
requested_at text not null,
started_at text,
finished_at text,
source_hash text,
job_manifest_path text not null,
result_path text not null,
error_summary text,
foreign key (note_id) references notes (note_id)
);
create table if not exists events (
id integer primary key autoincrement,
note_id text not null,
ts text not null,
kind text not null,
details text,
foreign key (note_id) references notes (note_id)
);
create index if not exists idx_jobs_status_requested_at on jobs(status, requested_at);
create index if not exists idx_versions_note_id_seen_at on versions(note_id, seen_at);
create index if not exists idx_events_note_id_ts on events(note_id, ts);
'
| ignore
}
export def log-event [note_id: string, kind: string, details?: any] {
let payload = if $details == null { null } else { $details | to json }
let note_id_q = (sql-quote $note_id)
let now_q = (sql-quote (now-iso))
let kind_q = (sql-quote $kind)
let payload_q = (sql-quote $payload)
let sql = ([
"insert into events (note_id, ts, kind, details) values ("
$note_id_q
", "
$now_q
", "
$kind_q
", "
$payload_q
");"
] | str join '')
sql-run $sql | ignore
}
export def slugify [value: string] {
let slug = (
$value
| str downcase
| str replace -r '[^a-z0-9]+' '-'
| str replace -r '^-+' ''
| str replace -r '-+$' ''
)
if $slug == '' {
'note'
} else {
$slug
}
}
export def sha256 [file: path] {
(^sha256sum $file | lines | first | split row ' ' | first)
}
export def parse-output-frontmatter [file: path] {
if not ($file | path exists) {
{}
} else {
let content = (open --raw $file)
if not ($content | str starts-with "---\n") {
{}
} else {
let rest = ($content | str substring 4..)
let end = ($rest | str index-of "\n---\n")
if $end == null {
{}
} else {
let block = ($rest | str substring 0..($end - 1))
$block
| lines
| where ($it | str contains ':')
| reduce --fold {} {|line, acc|
let idx = ($line | str index-of ':')
if $idx == null {
$acc
} else {
let key = ($line | str substring 0..($idx - 1) | str trim)
let value = ($line | str substring ($idx + 1).. | str trim)
$acc | upsert $key $value
}
}
}
}
}
}
export def zk-generated-note-path [title: string] {
let root = (notes-root)
let effective_title = if ($title | str trim) == '' {
'Imported note'
} else {
$title
}
let result = (
^zk --notebook-dir $root --working-dir $root new $root --no-input --title $effective_title --print-path --dry-run
| complete
)
if $result.exit_code != 0 {
error make {
msg: $"zk failed to generate a note path: ($result.stderr | str trim)"
}
}
let path_text = ($result.stderr | str trim)
if $path_text == '' {
error make {
msg: 'zk did not return a generated note path'
}
}
$path_text
| lines
| last
| str trim
}
export def new-note-id [] {
let suffix = (random uuid | str replace -a '-' '')
$"ntl_($suffix)"
}
export def new-job-id [] {
let suffix = (random uuid | str replace -a '-' '')
$"job_($suffix)"
}
export def new-version-id [] {
let suffix = (random uuid | str replace -a '-' '')
$"ver_($suffix)"
}
export def archive-path-for [note_id: string, source_hash: string, source_relpath: string] {
let stamp = (date now | format date "%Y-%m-%dT%H-%M-%SZ")
let short = ($source_hash | str substring 0..11)
let directory = [(archive-root) $note_id] | path join
let parsed = ($source_relpath | path parse)
let extension = if (($parsed.extension? | default '') | str trim) == '' {
'bin'
} else {
($parsed.extension | str downcase)
}
mkdir $directory
[$directory $"($stamp)-($short).($extension)"] | path join
}
export def transcript-path-for [note_id: string, job_id: string] {
let directory = [(transcript-root) $note_id] | path join
mkdir $directory
[$directory $"($job_id).md"] | path join
}
export def result-path-for [job_id: string] {
[(results-root) $"($job_id).json"] | path join
}
export def manifest-path-for [job_id: string, status: string] {
let root = match $status {
'queued' => (queued-root)
'running' => (running-root)
'failed' => (failed-root)
'done' => (done-root)
_ => (queued-root)
}
[$root $"($job_id).json"] | path join
}
export def note-output-path [title: string] {
zk-generated-note-path $title
}
export def is-supported-source-path [path: string] {
let lower = ($path | str downcase)
(($lower | str ends-with '.pdf') or ($lower | str ends-with '.png'))
}
export def is-ignored-path [relpath: string] {
let lower = ($relpath | str downcase)
let hidden = (($lower | str contains '/.') or ($lower | str starts-with '.'))
let temp = (($lower | str contains '/~') or ($lower | str ends-with '.tmp') or ($lower | str ends-with '.part'))
let conflict = ($lower | str contains '.sync-conflict')
($hidden or $temp or $conflict)
}
export def scan-source-files [] {
let root = (webdav-root)
if not ($root | path exists) {
[]
} else {
let files = ([
(glob $"($root)/**/*.pdf")
(glob $"($root)/**/*.PDF")
(glob $"($root)/**/*.png")
(glob $"($root)/**/*.PNG")
] | flatten)
$files
| sort
| uniq
| each {|file|
let relpath = ($file | path relative-to $root)
if ((is-ignored-path $relpath) or not (is-supported-source-path $file)) {
null
} else {
let stat = (ls -l $file | first)
{
source_path: $file
source_relpath: $relpath
source_size: $stat.size
source_mtime: ($stat.modified | format date "%Y-%m-%dT%H:%M:%SZ")
title: (($relpath | path parse).stem)
}
}
}
| where $it != null
}
}

View File

@@ -1,387 +0,0 @@
#!/usr/bin/env nu
use ./lib.nu *
use ./jobs.nu [archive-and-version, enqueue-job]
const settle_window = 45sec
const delete_grace = 15min
def settle-remaining [source_mtime: string] {
let modified = ($source_mtime | into datetime)
let age = ((date now) - $modified)
if $age >= $settle_window {
0sec
} else {
$settle_window - $age
}
}
def is-settled [source_mtime: string] {
let modified = ($source_mtime | into datetime)
((date now) - $modified) >= $settle_window
}
def log-job-enqueued [note_id: string, job_id: string, operation: string, source_hash: string, archive_path: string] {
log-event $note_id 'job-enqueued' {
job_id: $job_id
operation: $operation
source_hash: $source_hash
archive_path: $archive_path
}
}
def find-rename-candidate [source_hash: string] {
sql-json $"
select *
from notes
where current_source_hash = (sql-quote $source_hash)
and status != 'active'
and status != 'failed'
and status != 'conflict'
order by last_seen_at desc
limit 1;
"
}
def touch-note [note_id: string, source_size: any, source_mtime: string, status: string = 'active'] {
let source_size_int = ($source_size | into int)
let now_q = (sql-quote (now-iso))
let source_mtime_q = (sql-quote $source_mtime)
let status_q = (sql-quote $status)
let note_id_q = (sql-quote $note_id)
sql-run $"
update notes
set last_seen_at = ($now_q),
current_source_size = ($source_size_int),
current_source_mtime = ($source_mtime_q),
status = ($status_q)
where note_id = ($note_id_q);
"
| ignore
}
def process-existing [note: record, source: record] {
let title = $source.title
let note_id = ($note | get note_id)
let note_status = ($note | get status)
let source_size_int = ($source.source_size | into int)
if not (is-settled $source.source_mtime) {
touch-note $note_id $source_size_int $source.source_mtime $note_status
return
}
let previous_size = ($note.current_source_size? | default (-1))
let previous_mtime = ($note.current_source_mtime? | default '')
let size_changed = ($previous_size != $source_size_int)
let mtime_changed = ($previous_mtime != $source.source_mtime)
let needs_ingest = (($note.last_generated_source_hash? | default '') != ($note.current_source_hash? | default ''))
let hash_needed = ($note.current_source_hash? | default null) == null or $size_changed or $mtime_changed or ($note_status != 'active') or $needs_ingest
if not $hash_needed {
let now_q = (sql-quote (now-iso))
let title_q = (sql-quote $title)
let note_id_q = (sql-quote $note_id)
sql-run $"
update notes
set last_seen_at = ($now_q),
status = 'active',
title = ($title_q),
missing_since = null,
deleted_at = null
where note_id = ($note_id_q);
"
| ignore
return
}
let source_hash = (sha256 $source.source_path)
if ($source_hash == ($note.current_source_hash? | default '')) {
let now_q = (sql-quote (now-iso))
let title_q = (sql-quote $title)
let source_mtime_q = (sql-quote $source.source_mtime)
let note_id_q = (sql-quote $note_id)
let next_status = if $note_status == 'failed' { 'failed' } else { 'active' }
sql-run $"
update notes
set last_seen_at = ($now_q),
title = ($title_q),
status = (sql-quote $next_status),
missing_since = null,
deleted_at = null,
current_source_size = ($source_size_int),
current_source_mtime = ($source_mtime_q)
where note_id = ($note_id_q);
"
| ignore
let should_enqueue = ($note_status == 'failed' or (($note.last_generated_source_hash? | default '') != $source_hash))
if not $should_enqueue {
return
}
let archive_path = if (($note.current_archive_path? | default '') | str trim) == '' {
let version = (archive-and-version $note_id $source.source_path $source.source_relpath $source_size_int $source.source_mtime $source_hash)
let archive_path_q = (sql-quote $version.archive_path)
let version_id_q = (sql-quote $version.version_id)
sql-run $"
update notes
set current_archive_path = ($archive_path_q),
latest_version_id = ($version_id_q)
where note_id = ($note_id_q);
"
| ignore
$version.archive_path
} else {
$note.current_archive_path
}
let runtime_note = ($note | upsert source_path $source.source_path | upsert source_relpath $source.source_relpath | upsert output_path $note.output_path | upsert last_generated_output_hash ($note.last_generated_output_hash? | default null))
let retry_job = (enqueue-job $runtime_note 'upsert' $archive_path $archive_path $source_hash $title)
if $retry_job != null {
log-job-enqueued $note_id $retry_job.job_id 'upsert' $source_hash $archive_path
let reason = if $note_status == 'failed' {
'retry-failed-note'
} else {
'missing-generated-output'
}
log-event $note_id 'job-reenqueued' {
job_id: $retry_job.job_id
reason: $reason
source_hash: $source_hash
archive_path: $archive_path
}
}
return
}
let version = (archive-and-version $note_id $source.source_path $source.source_relpath $source_size_int $source.source_mtime $source_hash)
let now_q = (sql-quote (now-iso))
let title_q = (sql-quote $title)
let source_hash_q = (sql-quote $source_hash)
let source_mtime_q = (sql-quote $source.source_mtime)
let archive_path_q = (sql-quote $version.archive_path)
let version_id_q = (sql-quote $version.version_id)
let note_id_q = (sql-quote $note_id)
sql-run $"
update notes
set last_seen_at = ($now_q),
title = ($title_q),
status = 'active',
missing_since = null,
deleted_at = null,
current_source_hash = ($source_hash_q),
current_source_size = ($source_size_int),
current_source_mtime = ($source_mtime_q),
current_archive_path = ($archive_path_q),
latest_version_id = ($version_id_q),
last_error = null
where note_id = ($note_id_q);
"
| ignore
let runtime_note = ($note | upsert source_path $source.source_path | upsert source_relpath $source.source_relpath | upsert output_path $note.output_path | upsert last_generated_output_hash ($note.last_generated_output_hash? | default null))
let job = (enqueue-job $runtime_note 'upsert' $version.archive_path $version.archive_path $source_hash $title)
if $job != null {
log-job-enqueued $note_id $job.job_id 'upsert' $source_hash $version.archive_path
}
log-event $note_id 'source-updated' {
source_relpath: $source.source_relpath
source_hash: $source_hash
archive_path: $version.archive_path
}
}
def process-new [source: record] {
if not (is-settled $source.source_mtime) {
return
}
let source_hash = (sha256 $source.source_path)
let source_size_int = ($source.source_size | into int)
let rename_candidates = (find-rename-candidate $source_hash)
if not ($rename_candidates | is-empty) {
let rename_candidate = ($rename_candidates | first)
let source_relpath_q = (sql-quote $source.source_relpath)
let title_q = (sql-quote $source.title)
let now_q = (sql-quote (now-iso))
let source_mtime_q = (sql-quote $source.source_mtime)
let note_id_q = (sql-quote $rename_candidate.note_id)
sql-run $"
update notes
set source_relpath = ($source_relpath_q),
title = ($title_q),
last_seen_at = ($now_q),
status = 'active',
missing_since = null,
deleted_at = null,
current_source_size = ($source_size_int),
current_source_mtime = ($source_mtime_q)
where note_id = ($note_id_q);
"
| ignore
log-event $rename_candidate.note_id 'source-renamed' {
from: $rename_candidate.source_relpath
to: $source.source_relpath
}
return
}
let note_id = (new-note-id)
let first_seen_at = (now-iso)
let output_path = (note-output-path $source.title)
let version = (archive-and-version $note_id $source.source_path $source.source_relpath $source_size_int $source.source_mtime $source_hash)
let note_id_q = (sql-quote $note_id)
let source_relpath_q = (sql-quote $source.source_relpath)
let title_q = (sql-quote $source.title)
let output_path_q = (sql-quote $output_path)
let first_seen_q = (sql-quote $first_seen_at)
let source_hash_q = (sql-quote $source_hash)
let source_mtime_q = (sql-quote $source.source_mtime)
let archive_path_q = (sql-quote $version.archive_path)
let version_id_q = (sql-quote $version.version_id)
let sql = ([
"insert into notes (note_id, source_relpath, title, output_path, status, first_seen_at, last_seen_at, current_source_hash, current_source_size, current_source_mtime, current_archive_path, latest_version_id) values ("
$note_id_q
", "
$source_relpath_q
", "
$title_q
", "
$output_path_q
", 'active', "
$first_seen_q
", "
$first_seen_q
", "
$source_hash_q
", "
($source_size_int | into string)
", "
$source_mtime_q
", "
$archive_path_q
", "
$version_id_q
");"
] | str join '')
sql-run $sql | ignore
let note = {
note_id: $note_id
source_relpath: $source.source_relpath
source_path: $source.source_path
output_path: $output_path
last_generated_output_hash: null
}
let job = (enqueue-job $note 'upsert' $version.archive_path $version.archive_path $source_hash $source.title)
if $job != null {
log-job-enqueued $note_id $job.job_id 'upsert' $source_hash $version.archive_path
}
log-event $note_id 'source-discovered' {
source_relpath: $source.source_relpath
source_hash: $source_hash
archive_path: $version.archive_path
output_path: $output_path
}
}
def mark-missing [seen_relpaths: list<string>] {
let notes = (sql-json 'select note_id, source_relpath, status, missing_since from notes;')
for note in $notes {
if ($seen_relpaths | any {|rel| $rel == $note.source_relpath }) {
continue
}
if $note.status == 'active' {
let missing_since = (now-iso)
let missing_since_q = (sql-quote $missing_since)
let note_id_q = (sql-quote $note.note_id)
sql-run $"
update notes
set status = 'source_missing',
missing_since = ($missing_since_q)
where note_id = ($note_id_q);
"
| ignore
log-event $note.note_id 'source-missing' {
source_relpath: $note.source_relpath
}
continue
}
if $note.status == 'source_missing' and ($note.missing_since? | default null) != null {
let missing_since = ($note.missing_since | into datetime)
if ((date now) - $missing_since) >= $delete_grace {
let deleted_at = (now-iso)
let deleted_at_q = (sql-quote $deleted_at)
let note_id_q = (sql-quote $note.note_id)
sql-run $"
update notes
set status = 'source_deleted',
deleted_at = ($deleted_at_q)
where note_id = ($note_id_q);
"
| ignore
log-event $note.note_id 'source-deleted' {
source_relpath: $note.source_relpath
}
}
}
}
}
export def reconcile-run [] {
ensure-layout
mut sources = (scan-source-files)
let unsettled = (
$sources
| each {|source|
{
source_path: $source.source_path
remaining: (settle-remaining $source.source_mtime)
}
}
| where remaining > 0sec
)
if not ($unsettled | is-empty) {
let max_remaining = ($unsettled | get remaining | math max)
print $"Waiting ($max_remaining) for recent Notability uploads to settle"
sleep ($max_remaining + 2sec)
$sources = (scan-source-files)
}
for source in $sources {
let existing_rows = (sql-json $"
select *
from notes
where source_relpath = (sql-quote $source.source_relpath)
limit 1;
")
if (($existing_rows | length) == 0) {
process-new $source
} else {
let existing = ($existing_rows | first)
process-existing ($existing | upsert source_path $source.source_path) $source
}
}
mark-missing ($sources | get source_relpath)
}
def main [] {
reconcile-run
}

View File

@@ -1,148 +0,0 @@
#!/usr/bin/env nu
use ./lib.nu *
use ./jobs.nu [archive-and-version, enqueue-job]
use ./worker.nu [worker-run]
def latest-version [note_id: string] {
sql-json $"
select *
from versions
where note_id = (sql-quote $note_id)
order by seen_at desc
limit 1;
"
| first
}
def existing-active-job [note_id: string, source_hash: string] {
sql-json $"
select job_id
from jobs
where note_id = (sql-quote $note_id)
and source_hash = (sql-quote $source_hash)
and status != 'done'
and status != 'failed'
order by requested_at desc
limit 1;
"
| first
}
def archive-current-source [note: record] {
if not ($note.source_path | path exists) {
error make {
msg: $"Current source path is missing: ($note.source_path)"
}
}
let source_hash = (sha256 $note.source_path)
let source_size = (((ls -l $note.source_path | first).size) | into int)
let source_mtime = (((ls -l $note.source_path | first).modified) | format date "%Y-%m-%dT%H:%M:%SZ")
let version = (archive-and-version $note.note_id $note.source_path $note.source_relpath $source_size $source_mtime $source_hash)
sql-run $"
update notes
set current_source_hash = (sql-quote $source_hash),
current_source_size = ($source_size),
current_source_mtime = (sql-quote $source_mtime),
current_archive_path = (sql-quote $version.archive_path),
latest_version_id = (sql-quote $version.version_id),
last_seen_at = (sql-quote (now-iso)),
status = 'active',
missing_since = null,
deleted_at = null
where note_id = (sql-quote $note.note_id);
"
| ignore
{
input_path: $version.archive_path
archive_path: $version.archive_path
source_hash: $source_hash
}
}
def enqueue-reingest-job [note: record, source_hash: string, input_path: string, archive_path: string, force_overwrite_generated: bool] {
let job = (enqueue-job $note 'reingest' $input_path $archive_path $source_hash $note.title $force_overwrite_generated)
if $job == null {
let existing = (existing-active-job $note.note_id $source_hash)
print $"Already queued: ($existing.job_id? | default 'unknown')"
return
}
log-event $note.note_id 'reingest-enqueued' {
job_id: $job.job_id
source_hash: $source_hash
archive_path: $archive_path
force_overwrite_generated: $force_overwrite_generated
}
print $"Enqueued ($job.job_id) for ($note.note_id)"
try {
worker-run --drain
} catch {|error|
error make {
msg: (($error.msg? | default ($error | to nuon)) | into string)
}
}
}
def main [note_id: string, --latest-source, --latest-archive, --force-overwrite-generated] {
ensure-layout
let note_row = (sql-json $"
select *
from notes
where note_id = (sql-quote $note_id)
limit 1;
" | first)
let note = if $note_row == null {
null
} else {
$note_row | upsert source_path ([ (webdav-root) $note_row.source_relpath ] | path join)
}
if $note == null {
error make {
msg: $"Unknown note id: ($note_id)"
}
}
if $latest_source and $latest_archive {
error make {
msg: 'Choose only one of --latest-source or --latest-archive'
}
}
let source_mode = if $latest_source {
'source'
} else if $latest_archive {
'archive'
} else if ($note.status == 'active' and ($note.source_path | path exists)) {
'source'
} else {
'archive'
}
if $source_mode == 'source' {
let archived = (archive-current-source $note)
enqueue-reingest-job $note $archived.source_hash $archived.input_path $archived.archive_path $force_overwrite_generated
return
}
let version = (latest-version $note.note_id)
if $version == null {
error make {
msg: $"No archived version found for ($note.note_id)"
}
}
enqueue-reingest-job $note $version.source_hash $version.archive_path $version.archive_path $force_overwrite_generated
}

View File

@@ -1,202 +0,0 @@
#!/usr/bin/env nu
use ./lib.nu *
def format-summary [] {
let counts = (sql-json '
select status, count(*) as count
from notes
group by status
order by status;
')
let queue = (sql-json "
select status, count(*) as count
from jobs
where status in ('queued', 'running', 'failed')
group by status
order by status;
")
let lines = [
$"notes db: (db-path)"
$"webdav root: (webdav-root)"
$"notes root: (notes-root)"
''
'notes:'
]
let note_statuses = ('active,source_missing,source_deleted,conflict,failed' | split row ',')
let note_lines = (
$note_statuses
| each {|status|
let row = ($counts | where {|row| ($row | get 'status') == $status } | first)
let count = ($row.count? | default 0)
$" ($status): ($count)"
}
)
let job_statuses = ('queued,running,failed' | split row ',')
let job_lines = (
$job_statuses
| each {|status|
let row = ($queue | where {|row| ($row | get 'status') == $status } | first)
let count = ($row.count? | default 0)
$" ($status): ($count)"
}
)
($lines ++ $note_lines ++ ['' 'jobs:'] ++ $job_lines ++ ['']) | str join "\n"
}
def format-note [note_id: string] {
let note = (sql-json $"
select *
from notes
where note_id = (sql-quote $note_id)
limit 1;
" | first)
if $note == null {
error make {
msg: $"Unknown note id: ($note_id)"
}
}
let jobs = (sql-json $"
select job_id, operation, status, requested_at, started_at, finished_at, source_hash, error_summary
from jobs
where note_id = (sql-quote $note_id)
order by requested_at desc
limit 5;
")
let events = (sql-json $"
select ts, kind, details
from events
where note_id = (sql-quote $note_id)
order by ts desc
limit 10;
")
let output_exists = ($note.output_path | path exists)
let frontmatter = (parse-output-frontmatter $note.output_path)
let lines = [
$"note_id: ($note.note_id)"
$"title: ($note.title)"
$"status: ($note.status)"
$"source_relpath: ($note.source_relpath)"
$"output_path: ($note.output_path)"
$"output_exists: ($output_exists)"
$"managed_by: ($frontmatter.managed_by? | default '')"
$"frontmatter_note_id: ($frontmatter.note_id? | default '')"
$"current_source_hash: ($note.current_source_hash? | default '')"
$"last_generated_output_hash: ($note.last_generated_output_hash? | default '')"
$"current_archive_path: ($note.current_archive_path? | default '')"
$"last_processed_at: ($note.last_processed_at? | default '')"
$"missing_since: ($note.missing_since? | default '')"
$"deleted_at: ($note.deleted_at? | default '')"
$"conflict_path: ($note.conflict_path? | default '')"
$"last_error: ($note.last_error? | default '')"
''
'recent jobs:'
]
let job_lines = if ($jobs | is-empty) {
[' (none)']
} else {
$jobs | each {|job|
$" ($job.job_id) [($job.status)] ($job.operation) requested=($job.requested_at) error=($job.error_summary? | default '')"
}
}
let event_lines = if ($events | is-empty) {
[' (none)']
} else {
$events | each {|event|
$" ($event.ts) ($event.kind) ($event.details? | default '')"
}
}
($lines ++ $job_lines ++ ['' 'recent events:'] ++ $event_lines ++ ['']) | str join "\n"
}
def format-filtered [status: string, label: string] {
let notes = (sql-json $"
select note_id, title, source_relpath, output_path, status, last_error, conflict_path
from notes
where status = (sql-quote $status)
order by last_seen_at desc;
")
let header = [$label]
let body = if ($notes | is-empty) {
[' (none)']
} else {
$notes | each {|note|
let extra = if $status == 'conflict' {
$" conflict_path=($note.conflict_path? | default '')"
} else if $status == 'failed' {
$" last_error=($note.last_error? | default '')"
} else {
''
}
$" ($note.note_id) ($note.title) [($note.status)] source=($note.source_relpath) output=($note.output_path)($extra)"
}
}
($header ++ $body ++ ['']) | str join "\n"
}
def format-queue [] {
let jobs = (sql-json "
select job_id, note_id, operation, status, requested_at, started_at, error_summary
from jobs
where status in ('queued', 'running', 'failed')
order by requested_at asc;
")
let lines = if ($jobs | is-empty) {
['queue' ' (empty)' '']
} else {
['queue'] ++ ($jobs | each {|job|
$" ($job.job_id) note=($job.note_id) [($job.status)] ($job.operation) requested=($job.requested_at) error=($job.error_summary? | default '')"
}) ++ ['']
}
$lines | str join "\n"
}
def main [note_id?: string, --failed, --queue, --deleted, --conflicts] {
ensure-layout
if $queue {
print (format-queue)
return
}
if $failed {
print (format-filtered 'failed' 'failed notes')
return
}
if $deleted {
print (format-filtered 'source_deleted' 'deleted notes')
return
}
if $conflicts {
print (format-filtered 'conflict' 'conflict notes')
return
}
if $note_id != null {
print (format-note $note_id)
return
}
print (format-summary)
}

View File

@@ -1,58 +0,0 @@
#!/usr/bin/env nu
use ./lib.nu *
use ./reconcile.nu [reconcile-run]
use ./worker.nu [worker-run]
def error-message [error: any] {
let msg = (($error.msg? | default '') | into string)
if $msg == '' {
$error | to nuon
} else {
$msg
}
}
def run-worker [] {
try {
worker-run --drain
} catch {|error|
print $"worker failed: (error-message $error)"
}
}
def run-sync [] {
run-worker
try {
reconcile-run
} catch {|error|
print $"reconcile failed: (error-message $error)"
return
}
run-worker
}
def main [] {
ensure-layout
let root = (webdav-root)
print $"Watching ($root) for Notability WebDAV updates"
run-sync
^inotifywait -m -r --format '%w%f' -e create -e close_write -e moved_to -e moved_from -e delete -e attrib $root
| lines
| each {|changed_path|
if not (is-supported-source-path $changed_path) {
return
}
print $"Filesystem event for ($changed_path)"
run-sync
}
}

View File

@@ -1,36 +0,0 @@
#!/usr/bin/env nu
use ./lib.nu *
def main [] {
ensure-layout
let root = (webdav-root)
let addr = if ('NOTABILITY_WEBDAV_ADDR' in ($env | columns)) {
$env.NOTABILITY_WEBDAV_ADDR
} else {
'127.0.0.1:9980'
}
let user = if ('NOTABILITY_WEBDAV_USER' in ($env | columns)) {
$env.NOTABILITY_WEBDAV_USER
} else {
'notability'
}
let baseurl = if ('NOTABILITY_WEBDAV_BASEURL' in ($env | columns)) {
$env.NOTABILITY_WEBDAV_BASEURL
} else {
'/'
}
let password_file = if ('NOTABILITY_WEBDAV_PASSWORD_FILE' in ($env | columns)) {
$env.NOTABILITY_WEBDAV_PASSWORD_FILE
} else {
error make {
msg: 'NOTABILITY_WEBDAV_PASSWORD_FILE is required'
}
}
let password = (open --raw $password_file | str trim)
print $"Starting WebDAV on ($addr), serving ($root), base URL ($baseurl)"
run-external rclone 'serve' 'webdav' $root '--addr' $addr '--baseurl' $baseurl '--user' $user '--pass' $password
}

View File

@@ -1,506 +0,0 @@
#!/usr/bin/env nu
use ./lib.nu *
const qmd_debounce = 1min
const idle_sleep = 10sec
const vision_model = 'openai-codex/gpt-5.4'
const transcribe_timeout = '90s'
const normalize_timeout = '60s'
def next-queued-job [] {
sql-json "
select job_id, note_id, operation, job_manifest_path, result_path, source_hash
from jobs
where status = 'queued'
order by requested_at asc
limit 1;
"
| first
}
def maybe-update-qmd [] {
let dirty = (qmd-dirty-file)
if not ($dirty | path exists) {
return
}
let modified = ((ls -l $dirty | first).modified)
if ((date now) - $modified) < $qmd_debounce {
return
}
print 'Running qmd update'
let result = (do {
cd (notes-root)
run-external qmd 'update' | complete
})
if $result.exit_code != 0 {
print $"qmd update failed: ($result.stderr | str trim)"
return
}
rm -f $dirty
}
def write-result [result_path: path, payload: record] {
mkdir ($result_path | path dirname)
($payload | to json --indent 2) | save -f $result_path
}
def error-message [error: any] {
let msg = (($error.msg? | default '') | into string)
if ($msg == '' or $msg == 'External command failed') {
$error | to nuon
} else {
$msg
}
}
def unquote [value?: any] {
if $value == null {
''
} else {
($value | into string | str replace -r '^"(.*)"$' '$1' | str replace -r "^'(.*)'$" '$1')
}
}
def source-format [file: path] {
(([$file] | path parse | first).extension? | default 'bin' | str downcase)
}
def conflict-path-for [output_path: path] {
let parsed = ([$output_path] | path parse | first)
let stamp = ((date now) | format date '%Y-%m-%dT%H-%M-%SZ')
[$parsed.parent $"($parsed.stem).conflict-($stamp).($parsed.extension)"] | path join
}
def find-managed-outputs [note_id: string] {
let root = (notes-root)
if not ($root | path exists) {
[]
} else {
(glob $"($root)/**/*.md")
| where not ($it | str contains '/.')
| where {|file|
let parsed = (parse-output-frontmatter $file)
(unquote ($parsed.managed_by? | default '')) == 'notability-ingest' and (unquote ($parsed.note_id? | default '')) == $note_id
}
| sort
}
}
def resolve-managed-output-path [note_id: string, configured_output_path: path] {
if ($configured_output_path | path exists) {
let parsed = (parse-output-frontmatter $configured_output_path)
let managed_by = (unquote ($parsed.managed_by? | default ''))
let frontmatter_note_id = (unquote ($parsed.note_id? | default ''))
if ($managed_by == 'notability-ingest' and $frontmatter_note_id == $note_id) {
return $configured_output_path
}
}
let discovered = (find-managed-outputs $note_id)
if ($discovered | is-empty) {
$configured_output_path
} else if (($discovered | length) == 1) {
$discovered | first
} else {
error make {
msg: $"Multiple managed note files found for ($note_id): (($discovered | str join ', '))"
}
}
}
def determine-write-target [manifest: record] {
let output_path = (resolve-managed-output-path $manifest.note_id $manifest.output_path)
if not ($output_path | path exists) {
return {
output_path: $output_path
write_path: $output_path
write_mode: 'create'
updated_main_output: true
}
}
let parsed = (parse-output-frontmatter $output_path)
let managed_by = (unquote ($parsed.managed_by? | default ''))
let frontmatter_note_id = (unquote ($parsed.note_id? | default ''))
if ($managed_by == 'notability-ingest' and $frontmatter_note_id == $manifest.note_id) {
return {
output_path: $output_path
write_path: $output_path
write_mode: 'overwrite'
updated_main_output: true
}
}
{
output_path: $output_path
write_path: (conflict-path-for $output_path)
write_mode: 'conflict'
updated_main_output: false
}
}
def build-markdown [manifest: record, normalized: string] {
let body = ($normalized | str trim)
let output_body = if $body == '' {
$"# ($manifest.title)"
} else {
$body
}
let created = ($manifest.requested_at | str substring 0..9)
let updated = ((date now) | format date '%Y-%m-%d')
[
'---'
$"title: ($manifest.title | to json)"
$"created: ($created | to json)"
$"updated: ($updated | to json)"
'source: "notability"'
$"source_transport: (($manifest.source_transport? | default 'webdav') | to json)"
$"source_relpath: ($manifest.source_relpath | to json)"
$"note_id: ($manifest.note_id | to json)"
'managed_by: "notability-ingest"'
$"source_file: ($manifest.archive_path | to json)"
$"source_file_hash: ($'sha256:($manifest.source_hash)' | to json)"
$"source_format: ((source-format $manifest.archive_path) | to json)"
'status: "active"'
'tags:'
' - handwritten'
' - notability'
'---'
''
$output_body
''
] | str join "\n"
}
def render-pages [input_path: path, job_id: string] {
let extension = (([$input_path] | path parse | first).extension? | default '' | str downcase)
if $extension == 'png' {
[ $input_path ]
} else if $extension == 'pdf' {
let render_dir = [(render-root) $job_id] | path join
mkdir $render_dir
let prefix = [$render_dir 'page'] | path join
^pdftoppm -png -r 200 $input_path $prefix
let pages = ((glob $"($render_dir)/*.png") | sort)
if ($pages | is-empty) {
error make {
msg: $"No PNG pages rendered from ($input_path)"
}
}
$pages
} else {
error make {
msg: $"Unsupported Notability input format: ($input_path)"
}
}
}
def call-pi [timeout_window: string, prompt: string, inputs: list<path>, thinking: string] {
let prompt_file = (^mktemp --suffix '.md' | str trim)
$prompt | save -f $prompt_file
let input_refs = ($inputs | each {|input| $'@($input)' })
let prompt_ref = $'@($prompt_file)'
let result = (try {
^timeout $timeout_window pi --model $vision_model --thinking $thinking --no-tools --no-session -p ...$input_refs $prompt_ref | complete
} catch {|error|
rm -f $prompt_file
error make {
msg: (error-message $error)
}
})
rm -f $prompt_file
let output = ($result.stdout | str trim)
if $output != '' {
$output
} else {
let stderr = ($result.stderr | str trim)
if $stderr == '' {
error make {
msg: $"pi returned no output (exit ($result.exit_code))"
}
} else {
error make {
msg: $"pi returned no output (exit ($result.exit_code)): ($stderr)"
}
}
}
}
def ingest-job [manifest: record] {
mkdir $manifest.session_dir
let page_paths = (render-pages $manifest.input_path $manifest.job_id)
let transcribe_prompt = ([
'Transcribe this note into clean Markdown.'
''
'Read it like a human and reconstruct the intended reading order and structure.'
''
'Do not preserve handwritten layout literally.'
''
'Handwritten line breaks, word stacking, font size changes, and spacing are not semantic structure by default.'
''
'If adjacent handwritten lines clearly belong to one sentence or short phrase, merge them into normal prose with spaces instead of separate Markdown lines.'
''
'Only keep separate lines or blank lines when there is clear evidence of separate paragraphs, headings, list items, checkboxes, or other distinct blocks.'
''
'Keep headings, lists, and paragraphs when they are genuinely present.'
''
'Do not summarize. Do not add commentary. Return Markdown only.'
] | str join "\n")
print $"Transcribing ($manifest.job_id) with page count ($page_paths | length)"
let transcript = (call-pi $transcribe_timeout $transcribe_prompt $page_paths 'low')
mkdir ($manifest.transcript_path | path dirname)
$"($transcript)\n" | save -f $manifest.transcript_path
let normalize_prompt = ([
'Rewrite the attached transcription into clean Markdown.'
''
'Preserve the same content and intended structure.'
''
'Collapse layout-only line breaks from handwriting.'
''
'If short adjacent lines are really one sentence or phrase, join them with spaces instead of keeping one line per handwritten row.'
''
'Use separate lines only for real headings, list items, checkboxes, or distinct paragraphs.'
''
'Do not summarize. Return Markdown only.'
] | str join "\n")
print $"Normalizing ($manifest.job_id)"
let normalized = (call-pi $normalize_timeout $normalize_prompt [ $manifest.transcript_path ] 'off')
let markdown = (build-markdown $manifest $normalized)
let target = (determine-write-target $manifest)
mkdir ($target.write_path | path dirname)
$markdown | save -f $target.write_path
{
success: true
job_id: $manifest.job_id
note_id: $manifest.note_id
archive_path: $manifest.archive_path
source_hash: $manifest.source_hash
session_dir: $manifest.session_dir
output_path: $target.output_path
output_hash: (if $target.updated_main_output { sha256 $target.write_path } else { null })
conflict_path: (if $target.write_mode == 'conflict' { $target.write_path } else { null })
write_mode: $target.write_mode
updated_main_output: $target.updated_main_output
transcript_path: $manifest.transcript_path
}
}
def mark-failure [job: record, running_path: string, error_summary: string, result?: any] {
let finished_at = (now-iso)
sql-run $"
update jobs
set status = 'failed',
finished_at = (sql-quote $finished_at),
error_summary = (sql-quote $error_summary),
job_manifest_path = (sql-quote (manifest-path-for $job.job_id 'failed'))
where job_id = (sql-quote $job.job_id);
update notes
set status = 'failed',
last_error = (sql-quote $error_summary)
where note_id = (sql-quote $job.note_id);
"
| ignore
if $result != null and ($result.archive_path? | default null) != null {
sql-run $"
update versions
set ingest_result = 'failed',
session_path = (sql-quote ($result.session_dir? | default ''))
where archive_path = (sql-quote $result.archive_path);
"
| ignore
}
let failed_path = (manifest-path-for $job.job_id 'failed')
if ($running_path | path exists) {
mv -f $running_path $failed_path
}
log-event $job.note_id 'job-failed' {
job_id: $job.job_id
error: $error_summary
}
}
def mark-success [job: record, running_path: string, result: record] {
let finished_at = (now-iso)
let note_status = if ($result.write_mode? | default 'write') == 'conflict' {
'conflict'
} else {
'active'
}
let output_path_q = (sql-quote ($result.output_path? | default null))
let output_hash_update = if ($result.updated_main_output? | default false) {
sql-quote ($result.output_hash? | default null)
} else {
'last_generated_output_hash'
}
let source_hash_update = if ($result.updated_main_output? | default false) {
sql-quote ($result.source_hash? | default null)
} else {
'last_generated_source_hash'
}
sql-run $"
update jobs
set status = 'done',
finished_at = (sql-quote $finished_at),
error_summary = null,
job_manifest_path = (sql-quote (manifest-path-for $job.job_id 'done'))
where job_id = (sql-quote $job.job_id);
update notes
set status = (sql-quote $note_status),
output_path = ($output_path_q),
last_processed_at = (sql-quote $finished_at),
last_generated_output_hash = ($output_hash_update),
last_generated_source_hash = ($source_hash_update),
conflict_path = (sql-quote ($result.conflict_path? | default null)),
last_error = null
where note_id = (sql-quote $job.note_id);
update versions
set ingest_result = 'success',
session_path = (sql-quote ($result.session_dir? | default ''))
where archive_path = (sql-quote $result.archive_path);
"
| ignore
let done_path = (manifest-path-for $job.job_id 'done')
if ($running_path | path exists) {
mv -f $running_path $done_path
}
^touch (qmd-dirty-file)
log-event $job.note_id 'job-finished' {
job_id: $job.job_id
write_mode: ($result.write_mode? | default 'write')
output_path: ($result.output_path? | default '')
conflict_path: ($result.conflict_path? | default '')
}
}
def recover-running-jobs [] {
let jobs = (sql-json "
select job_id, note_id, job_manifest_path, result_path
from jobs
where status = 'running'
order by started_at asc;
")
for job in $jobs {
let running_path = (manifest-path-for $job.job_id 'running')
let result = if ($job.result_path | path exists) {
open $job.result_path
} else {
null
}
mark-failure $job $running_path 'worker interrupted before completion' $result
}
}
def process-job [job: record] {
let running_path = (manifest-path-for $job.job_id 'running')
mv -f $job.job_manifest_path $running_path
sql-run $"
update jobs
set status = 'running',
started_at = (sql-quote (now-iso)),
job_manifest_path = (sql-quote $running_path)
where job_id = (sql-quote $job.job_id);
"
| ignore
print $"Processing ($job.job_id) for ($job.note_id)"
let manifest = (open $running_path)
try {
let result = (ingest-job $manifest)
write-result $job.result_path $result
mark-success $job $running_path $result
} catch {|error|
let message = (error-message $error)
let result = {
success: false
job_id: $manifest.job_id
note_id: $manifest.note_id
archive_path: $manifest.archive_path
source_hash: $manifest.source_hash
session_dir: $manifest.session_dir
error: $message
}
write-result $job.result_path $result
mark-failure $job $running_path $message $result
}
}
def drain-queued-jobs [] {
loop {
let job = (next-queued-job)
if $job == null {
maybe-update-qmd
break
}
process-job $job
maybe-update-qmd
}
}
export def worker-run [--drain] {
ensure-layout
recover-running-jobs
if $drain {
drain-queued-jobs
return
}
while true {
let job = (next-queued-job)
if $job == null {
maybe-update-qmd
sleep $idle_sleep
continue
}
process-job $job
maybe-update-qmd
}
}
def main [--drain] {
worker-run --drain=$drain
}

View File

@@ -1,26 +0,0 @@
I will provide you with the content and title of a document. Your task is to select appropriate tags for the document from the available list.
Only select tags from the provided list.
Rules:
1. Focus on WHAT the document IS (document type) and what TOPIC it relates to — not on incidental details mentioned in the content.
- GOOD tags for a server hosting invoice: "Invoice", "Hosting"
- BAD tags for a server hosting invoice: "IBAN", "VAT", "Bank account" — these are just details that appear on any invoice.
2. Pick 1-4 tags maximum. Fewer is better. Every tag must add distinct, meaningful categorisation value.
3. All tags must be in English.
4. Never tag based on formatting details, payment methods, reference numbers, or boilerplate text.
The content is likely in {{.Language}}, but tags must always be in English.
<available_tags>
{{.AvailableTags | join ", "}}
</available_tags>
<title>
{{.Title}}
</title>
<content>
{{.Content}}
</content>
Respond only with the selected tags as a comma-separated list, without any additional information.

View File

@@ -1,26 +0,0 @@
I will provide you with the content of a document that has been partially read by OCR (so it may contain errors).
Your task is to generate a clear, consistent document title for use in paperless-ngx.
Title format: "YYYY-MM-DD - Sender - Description"
- YYYY-MM-DD: The document date (issue date, statement date, etc.). Use the most specific date available. If no date is found, omit the date prefix.
- Sender: The company, organisation, or person who sent/issued the document. Use their common short name (e.g. "Hetzner" not "Hetzner Online GmbH").
- Description: A brief description of what the document is (e.g. "Server hosting invoice", "Payslip January", "Employment contract", "Tax assessment 2024"). Keep it concise but specific enough to distinguish from similar documents.
Examples:
- "2025-03-01 - Hetzner - Server hosting invoice"
- "2024-12-15 - Techniker Krankenkasse - Health insurance statement"
- "2024-06-30 - Acme Corp - Payslip June"
- "2024-01-10 - Finanzamt Berlin - Tax assessment 2023"
Rules:
1. Always write the title in English, regardless of the document language.
2. Keep the description part under 6 words.
3. If the original title contains useful information, use it to inform your suggestion.
4. Respond only with the title, without any additional information.
The content is likely in {{.Language}}.
<original_title>{{.Title}}</original_title>
<content>
{{.Content}}
</content>

View File

@@ -1,87 +0,0 @@
{config, ...}: {
services.caddy = {
enable = true;
enableReload = false;
globalConfig = ''
admin off
'';
virtualHosts."docs.manticore-hippocampus.ts.net" = {
extraConfig = ''
tls {
get_certificate tailscale
}
reverse_proxy localhost:${toString config.services.paperless.port}
'';
};
virtualHosts."docs-ai.manticore-hippocampus.ts.net" = {
extraConfig = ''
tls {
get_certificate tailscale
}
reverse_proxy localhost:8081
'';
};
};
virtualisation.oci-containers = {
backend = "docker";
containers.paperless-gpt = {
image = "icereed/paperless-gpt:latest";
autoStart = true;
ports = [
"127.0.0.1:8081:8080"
];
volumes = [
"paperless-gpt-data:/app/data"
"paperless-gpt-prompts:/app/prompts"
"${./paperless-gpt-prompts/tag_prompt.tmpl}:/app/prompts/tag_prompt.tmpl:ro"
"${./paperless-gpt-prompts/title_prompt.tmpl}:/app/prompts/title_prompt.tmpl:ro"
];
environment = {
PAPERLESS_BASE_URL = "http://host.docker.internal:${toString config.services.paperless.port}";
LLM_PROVIDER = "openai";
LLM_MODEL = "gpt-5.4";
LLM_LANGUAGE = "German";
VISION_LLM_PROVIDER = "openai";
VISION_LLM_MODEL = "gpt-5.4";
LOG_LEVEL = "info";
};
environmentFiles = [
config.sops.secrets.tahani-paperless-gpt-env.path
];
extraOptions = [
"--add-host=host.docker.internal:host-gateway"
];
};
};
services.redis.servers.paperless = {
enable = true;
port = 6379;
bind = "127.0.0.1";
settings = {
maxmemory = "256mb";
maxmemory-policy = "allkeys-lru";
};
};
services.paperless = {
enable = true;
address = "0.0.0.0";
consumptionDir = "/var/lib/paperless/consume";
passwordFile = config.sops.secrets.tahani-paperless-password.path;
settings = {
PAPERLESS_DBENGINE = "sqlite";
PAPERLESS_REDIS = "redis://127.0.0.1:6379";
PAPERLESS_CONSUMER_IGNORE_PATTERN = [
".DS_STORE/*"
"desktop.ini"
];
PAPERLESS_CONSUMER_POLLING = 30;
PAPERLESS_CONSUMER_RECURSIVE = true;
PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS = true;
PAPERLESS_OCR_LANGUAGE = "deu+eng";
PAPERLESS_CSRF_TRUSTED_ORIGINS = "https://docs.manticore-hippocampus.ts.net";
};
};
}