Purpose: Complete the search implementation phase by adding Orcha-style LLM matching (using Gemini Flash with historical booking context) and a minimal web interface that displays results from all 4 search approaches side-by-side.
Output: LLM matching module and Flask app with search form and results display.
<execution_context> @./.claude/get-shit-done/workflows/execute-plan.md @./.claude/get-shit-done/templates/summary.md </execution_context>
@.planning/phases/03-search-implementation/03-01-SUMMARY.md
@src/db.py @src/normalize.py
fetch_booking_history(conn, supplier_name: str, limit: int = 50) -> list[dict]:
% with threshold 0.7 (matching Orcha exactly per user decision)similarity(supplier_name_normalized, %s) DESCformat_as_csv(data: list[dict]) -> str:
build_matching_prompt(query_text: str, historical_bookings: list[dict]) -> str:
call_gemini_flash(prompt: str) -> str:
json if present)llm_context_match(conn, query_text: str) -> dict:
Load .env for GOOGLE_API_KEY (different from Vertex AI credentials).
IMPORTANT: For Gemini via API key (not Vertex AI):
genai.Client()Update src/search/init.py to export llm_context_match.
# Apply pg_trgm extension
docker exec -i semantic-search-db psql -U dev -d semantic_search -c "CREATE EXTENSION IF NOT EXISTS pg_trgm;"
docker exec -i semantic-search-db psql -U dev -d semantic_search -c "CREATE INDEX IF NOT EXISTS idx_supplier_trgm ON line_item USING gin (supplier_name_normalized gin_trgm_ops);"
# Test fetch_booking_history
python -c "
from src.db import get_connection
from src.search.llm_matching import fetch_booking_history
conn = get_connection()
results = fetch_booking_history(conn, 'AMAZON')
print(f'Found {len(results)} historical bookings for AMAZON')
assert len(results) > 0
conn.close()
"
Create src/templates/ directory
Create src/app.py with Flask application:
from flask import Flask, render_template, request
from concurrent.futures import ThreadPoolExecutor, as_completed
from src.db import get_connection
from src.search import search_all_models
from src.search.llm_matching import llm_context_match
app = Flask(__name__, template_folder='templates')
@app.route('/', methods=['GET', 'POST'])
def search():
results = None
query = ''
k = 5
timing = {}
if request.method == 'POST':
query = request.form.get('query', '').strip()
k = int(request.form.get('k', 5))
if query:
conn = get_connection()
try:
# Execute all searches in parallel
results = execute_parallel_search(conn, query, k)
finally:
conn.close()
return render_template('search.html', query=query, k=k, results=results)
def execute_parallel_search(conn, query: str, k: int) -> dict:
"""Execute pgvector and LLM searches in parallel."""
results = {}
def search_pgvector_all():
# Uses search_all_models from Plan 01
return ('pgvector', search_all_models(conn, query, k))
def search_llm():
return ('llm', llm_context_match(conn, query))
with ThreadPoolExecutor(max_workers=2) as executor:
futures = [
executor.submit(search_pgvector_all),
executor.submit(search_llm),
]
for future in as_completed(futures):
try:
key, result = future.result()
if key == 'pgvector':
results.update(result) # Adds google, jina, minilm keys
else:
results[key] = result
except Exception as e:
# Handle individual search failures gracefully
if key == 'llm':
results['llm'] = {'error': str(e)}
return results
if __name__ == '__main__':
app.run(debug=True, port=5000)
Keep styles minimal but functional:
# Start Flask app and verify it runs
cd /home/volrath/code/worktrees-orcha-semantic-search/spikes/semantic-search
python -c "from src.app import app; print('Flask app imports successfully')"
# Test search endpoint manually
python -c "
from src.app import app
with app.test_client() as client:
# GET request should show form
response = client.get('/')
assert response.status_code == 200
assert b'Search' in response.data
# POST request with query
response = client.post('/', data={'query': 'AMAZON | Office supplies', 'k': '3'})
assert response.status_code == 200
print('Flask app responding correctly')
"
Start the Flask development server:
cd /home/volrath/code/worktrees-orcha-semantic-search/spikes/semantic-search
python -m src.app
Test with sample queries from the test set:
Verify for each query:
Test edge cases:
Create a simple test script at src/test_search.py for automated verification:
"""Quick test script for search functionality."""
from src.db import get_connection
from src.search import search_all_models
from src.search.llm_matching import llm_context_match
def test_search():
conn = get_connection()
# Test pgvector search
results = search_all_models(conn, "AMAZON | Office supplies", k=3)
assert 'google' in results
assert 'jina' in results
assert 'minilm' in results
assert len(results['google']) == 3
# Test LLM matching
llm_result = llm_context_match(conn, "AMAZON | Office supplies")
assert 'debit_account' in llm_result or 'error' in llm_result
conn.close()
print("All search tests passed!")
if __name__ == "__main__":
test_search()
Run: python -m src.test_search
python -m src.test_search
Expected output: "All search tests passed!"
LLM matching returns predictions:
from src.search.llm_matching import llm_context_match
from src.db import get_connection
conn = get_connection()
result = llm_context_match(conn, "AMAZON | supplies")
print(result) # Should have debit_account, cost_center
Flask app accessible at http://localhost:5000:
Parallel execution (4 searches complete faster than sequential):
<success_criteria>