From 7045dee36edc92a17deb678e0bd113ba74533c2f Mon Sep 17 00:00:00 2001 From: Christoph Schmatzler Date: Fri, 13 Mar 2026 18:14:50 +0000 Subject: [PATCH] opus is dumb --- .../paperless-gpt-prompts/tag_prompt.tmpl | 26 +++++++++++++++++++ .../paperless-gpt-prompts/title_prompt.tmpl | 26 +++++++++++++++++++ modules/_hosts/tahani/paperless.nix | 2 ++ 3 files changed, 54 insertions(+) create mode 100644 modules/_hosts/tahani/paperless-gpt-prompts/tag_prompt.tmpl create mode 100644 modules/_hosts/tahani/paperless-gpt-prompts/title_prompt.tmpl diff --git a/modules/_hosts/tahani/paperless-gpt-prompts/tag_prompt.tmpl b/modules/_hosts/tahani/paperless-gpt-prompts/tag_prompt.tmpl new file mode 100644 index 0000000..63f8911 --- /dev/null +++ b/modules/_hosts/tahani/paperless-gpt-prompts/tag_prompt.tmpl @@ -0,0 +1,26 @@ +I will provide you with the content and title of a document. Your task is to select appropriate tags for the document from the available list. +Only select tags from the provided list. + +Rules: +1. Focus on WHAT the document IS (document type) and what TOPIC it relates to — not on incidental details mentioned in the content. + - GOOD tags for a server hosting invoice: "Invoice", "Hosting" + - BAD tags for a server hosting invoice: "IBAN", "VAT", "Bank account" — these are just details that appear on any invoice. +2. Pick 1-4 tags maximum. Fewer is better. Every tag must add distinct, meaningful categorisation value. +3. All tags must be in English. +4. Never tag based on formatting details, payment methods, reference numbers, or boilerplate text. + +The content is likely in {{.Language}}, but tags must always be in English. + + +{{.AvailableTags | join ", "}} + + + +{{.Title}} + + + +{{.Content}} + + +Respond only with the selected tags as a comma-separated list, without any additional information. diff --git a/modules/_hosts/tahani/paperless-gpt-prompts/title_prompt.tmpl b/modules/_hosts/tahani/paperless-gpt-prompts/title_prompt.tmpl new file mode 100644 index 0000000..2c661b2 --- /dev/null +++ b/modules/_hosts/tahani/paperless-gpt-prompts/title_prompt.tmpl @@ -0,0 +1,26 @@ +I will provide you with the content of a document that has been partially read by OCR (so it may contain errors). +Your task is to generate a clear, consistent document title for use in paperless-ngx. + +Title format: "YYYY-MM-DD - Sender - Description" +- YYYY-MM-DD: The document date (issue date, statement date, etc.). Use the most specific date available. If no date is found, omit the date prefix. +- Sender: The company, organisation, or person who sent/issued the document. Use their common short name (e.g. "Hetzner" not "Hetzner Online GmbH"). +- Description: A brief description of what the document is (e.g. "Server hosting invoice", "Payslip January", "Employment contract", "Tax assessment 2024"). Keep it concise but specific enough to distinguish from similar documents. + +Examples: +- "2025-03-01 - Hetzner - Server hosting invoice" +- "2024-12-15 - Techniker Krankenkasse - Health insurance statement" +- "2024-06-30 - Acme Corp - Payslip June" +- "2024-01-10 - Finanzamt Berlin - Tax assessment 2023" + +Rules: +1. Always write the title in English, regardless of the document language. +2. Keep the description part under 6 words. +3. If the original title contains useful information, use it to inform your suggestion. +4. Respond only with the title, without any additional information. + +The content is likely in {{.Language}}. + +{{.Title}} + +{{.Content}} + diff --git a/modules/_hosts/tahani/paperless.nix b/modules/_hosts/tahani/paperless.nix index baaf6a9..a8f83c7 100644 --- a/modules/_hosts/tahani/paperless.nix +++ b/modules/_hosts/tahani/paperless.nix @@ -34,6 +34,8 @@ volumes = [ "paperless-gpt-data:/app/data" "paperless-gpt-prompts:/app/prompts" + "${./paperless-gpt-prompts/tag_prompt.tmpl}:/app/prompts/tag_prompt.tmpl:ro" + "${./paperless-gpt-prompts/title_prompt.tmpl}:/app/prompts/title_prompt.tmpl:ro" ]; environment = { PAPERLESS_BASE_URL = "http://host.docker.internal:${toString config.services.paperless.port}";