Purpose: Provides the database foundation for storing line items with vector embeddings for semantic search comparison.
Output: Running Postgres database with line_item table ready for data import and embedding storage.
<execution_context> @./.claude/get-shit-done/workflows/execute-plan.md @./.claude/get-shit-done/templates/summary.md </execution_context>
services:
db:
image: pgvector/pgvector:pg18
container_name: semantic-search-db
ports:
- "5433:5432"
environment:
POSTGRES_USER: dev
POSTGRES_PASSWORD: dev
POSTGRES_DB: semantic_search
volumes:
- ./init.sql:/docker-entrypoint-initdb.d/init.sql
- pgvector_data:/var/lib/postgresql/18/docker
healthcheck:
test: ["CMD-SHELL", "pg_isready -U dev -d semantic_search"]
interval: 5s
timeout: 5s
retries: 5
volumes:
pgvector_data:
Use port 5433 to avoid conflicts with local Postgres installations.
Use Postgres 18 data path /var/lib/postgresql/18/docker (changed from /data in Postgres 18).
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Line item table matching Orcha's booking_history_item structure
CREATE TABLE line_item (
id BIGSERIAL PRIMARY KEY,
-- Original values
supplier_name TEXT NOT NULL,
description TEXT NOT NULL,
debit_account TEXT NOT NULL,
credit_account TEXT,
cost_center TEXT,
net_amount NUMERIC,
-- Normalized values (for matching)
supplier_name_normalized TEXT NOT NULL,
description_normalized TEXT NOT NULL,
-- Embedding columns (populated in Phase 2, defined now to avoid migrations)
embedding_google vector(768), -- text-multilingual-embedding-002
embedding_jina vector(1024), -- jina-embeddings-v3
embedding_minilm vector(384), -- all-MiniLM-L6-v2
-- Metadata
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- Note: HNSW indexes created in Phase 2 after data load for performance
Per user decision: Include nullable embedding columns from start to avoid migrations. Vector dimensions per research: Google=768, Jina=1024, MiniLM=384.
docker compose up -ddocker compose ps (should show healthy)docker compose exec db psql -U dev -d semantic_search -c "SELECT extversion FROM pg_extension WHERE extname = 'vector';"docker compose exec db psql -U dev -d semantic_search -c "\d line_item"docker compose exec db psql -U dev -d semantic_search -c "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'line_item' AND data_type = 'USER-DEFINED';"Stop container after verification (will be started again in Plan 02): docker compose down
<success_criteria>