diff --git a/app/api/configs.py b/app/api/configs.py index fc7ac40..752b838 100644 --- a/app/api/configs.py +++ b/app/api/configs.py @@ -9,8 +9,10 @@ returns metadata (no file contents are returned). """ from __future__ import annotations +import csv import glob import os +import xml.etree.ElementTree as ET from datetime import datetime from pathlib import Path from typing import Any @@ -153,23 +155,96 @@ async def update_config(reference: str, request: Request): ) +# ── Schema inference helpers ──────────────────────────────────────────────── + +_DATE_FORMATS = ["%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y%m%d", "%d-%m-%Y"] +_DATETIME_FORMATS = ["%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S.%f"] + + +def _infer_type(values: list[str]) -> str: + sample = [v.strip() for v in values if v.strip()][:30] + if not sample: + return "str" + try: + [int(v.replace(",", "")) for v in sample] + return "int" + except ValueError: + pass + try: + [float(v.replace(",", "")) for v in sample] + return "float" + except ValueError: + pass + for fmt in _DATETIME_FORMATS: + try: + [datetime.strptime(v, fmt) for v in sample] + return f"datetime('{fmt}')" + except ValueError: + pass + for fmt in _DATE_FORMATS: + try: + [datetime.strptime(v, fmt) for v in sample] + return f"date('{fmt}')" + except ValueError: + pass + return "str" + + +def _csv_schema(path: str, delimiter: str = ",", has_header: bool = True) -> dict[str, str]: + try: + with open(path, newline="", encoding="utf-8", errors="replace") as f: + reader = csv.reader(f, delimiter=delimiter) + rows = [r for _, r in zip(range(31), reader)] + if not rows: + return {} + headers = rows[0] if has_header else [f"col_{i}" for i in range(len(rows[0]))] + data_rows = rows[1:] if has_header else rows + return { + col: _infer_type([r[i] for r in data_rows if i < len(r)]) + for i, col in enumerate(headers) + if col.strip() + } + except Exception: + return {} + + +def _xml_schema(path: str, xpathstr: str = "./*") -> dict[str, str]: + try: + tree = ET.parse(path) + root = tree.getroot() + elements = root.findall(xpathstr) or list(root) + if not elements: + return {} + schema: dict[str, str] = {} + for el in elements[:1]: + for child in el: + schema[child.tag] = "str" + for attr in el.attrib: + schema[f"@{attr}"] = "str" + return schema + except Exception: + return {} + + # ── Test URL ──────────────────────────────────────────────────────────────── @router.post("/test-url") async def test_url(request: Request): - """Return metadata for a URL pattern — no file contents ever returned.""" + """Return file/DB metadata and inferred schema — no row data ever returned.""" + from datetime import date as date_type + body = await request.json() url: str = body.get("url", "") as_at_date: str = body.get("as_at_date", datetime.now().strftime("%Y%m%d")) + csv_spec: dict = body.get("csv_spec", {}) + # ── DB URL — schema inspection wired up per-connector when available ────── if not url.startswith("file://"): - return {"type": "non-file", "message": "Only file:// URLs can be tested here. Database connections are validated at run time."} + scheme = url.split("://")[0] if "://" in url else url + return {"type": "db", "scheme": scheme, "schema": {}, "found": False} - # Resolve the path part (strip file://) + # ── file:// URL ────────────────────────────────────────────────────────── raw_path = url[7:] - - # Replace simple template variables for preview purposes - from datetime import date as date_type try: as_at = datetime.strptime(as_at_date, "%Y%m%d").date() except ValueError: @@ -183,40 +258,49 @@ async def test_url(request: Request): .replace("{{today.strftime('%Y-%m-%d')}}", date_type.today().strftime("%Y-%m-%d")) ) - # Treat as glob pattern for anything still containing { + def _file_info(p: str) -> dict: + try: + st = os.stat(p) + return {"path": p, "size_bytes": st.st_size, + "modified": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds")} + except OSError: + return {"path": p, "error": "Could not stat file"} + + def _schema_for(p: str) -> dict[str, str]: + pl = p.lower() + if pl.endswith(".xml"): + return _xml_schema(p) + delimiter = csv_spec.get("delimiter", ",") or "," + has_header = csv_spec.get("has_header", True) + return _csv_schema(p, delimiter=delimiter, has_header=has_header) + + # Glob pattern if "*" in resolved or "?" in resolved or "{" in resolved: - matches = glob.glob(resolved) + matches = sorted(glob.glob(resolved)) if not matches: - return {"type": "file", "resolved": resolved, "found": False, "message": "No files matched the pattern."} - file_infos = [] - for p in sorted(matches)[:10]: - try: - st = os.stat(p) - file_infos.append({ - "path": p, - "size_bytes": st.st_size, - "modified": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"), - }) - except OSError: - file_infos.append({"path": p, "error": "Could not stat file"}) - return {"type": "file", "resolved": resolved, "found": True, "matches": len(matches), "files": file_infos} + return {"type": "file", "resolved": resolved, "found": False, + "message": "No files matched the pattern.", "schema": {}} + schema = _schema_for(matches[0]) + return { + "type": "file", "resolved": resolved, "found": True, + "matches": len(matches), + "files": [_file_info(p) for p in matches[:10]], + "schema": schema, + } # Exact path p = Path(resolved) if not p.exists(): - return {"type": "file", "resolved": resolved, "found": False, "message": f"File not found: {resolved}"} + return {"type": "file", "resolved": resolved, "found": False, + "message": f"File not found: {resolved}", "schema": {}} try: - st = p.stat() + schema = _schema_for(str(p)) return { - "type": "file", - "resolved": resolved, - "found": True, + "type": "file", "resolved": resolved, "found": True, "matches": 1, - "files": [{ - "path": str(p), - "size_bytes": st.st_size, - "modified": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"), - }], + "files": [_file_info(str(p))], + "schema": schema, } except OSError as e: - return {"type": "file", "resolved": resolved, "found": False, "message": str(e)} + return {"type": "file", "resolved": resolved, "found": False, + "message": str(e), "schema": {}} diff --git a/app/core/app_factory.py b/app/core/app_factory.py index 6824afd..6e6303f 100644 --- a/app/core/app_factory.py +++ b/app/core/app_factory.py @@ -10,6 +10,7 @@ from app.api.configs import router as configs_router from app.core.settings import get_settings from app.views.auth import router as auth_router from app.views.views import router as dashboard_router +from app.views.config_views import router as config_views_router from app.views.docs import router as docs_router @@ -38,5 +39,6 @@ def create_app() -> FastAPI: app.include_router(configs_router) app.include_router(auth_router) app.include_router(dashboard_router) + app.include_router(config_views_router) app.include_router(docs_router) return app diff --git a/app/views/config_views.py b/app/views/config_views.py new file mode 100644 index 0000000..5f31891 --- /dev/null +++ b/app/views/config_views.py @@ -0,0 +1,68 @@ +"""Config list and editor views.""" +from pathlib import Path +from datetime import datetime + +from fastapi import APIRouter, Request +from fastapi.responses import HTMLResponse +from fastapi.templating import Jinja2Templates + +from app.core.refdata import ReconPatterns, ReconConfigStatus +from app.service.fake_configs import get_fake_configs + +router = APIRouter(prefix="", tags=["Config_UI"]) + +_project_root = Path(__file__).resolve().parents[2] +templates = Jinja2Templates(directory=str(_project_root / "data" / "templates")) + +_EDITOR_CONTEXT = { + "patterns": [p.value for p in ReconPatterns], + "statuses": [s.value for s in ReconConfigStatus], + "frequencies": ["Ad Hoc", "Intra Day", "Daily", "Weekly", "Monthly", "Quarterly"], + "schema_types": ["str", "int", "float", "date('%Y-%m-%d')", "datetime('%Y-%m-%d %H:%M:%S')", "bool"], +} + + +@router.get("/configs", response_class=HTMLResponse) +async def configs_list_view(request: Request): + now = datetime.now() + configs = get_fake_configs(now) + return templates.TemplateResponse( + request=request, + name="configs_list.html", + context={ + "title": "Recon Configs", + "configs": configs, + }, + ) + + +@router.get("/configs/new", response_class=HTMLResponse) +async def config_new_view(request: Request): + return templates.TemplateResponse( + request=request, + name="config_editor.html", + context={ + "title": "New Config", + "config": None, + "is_new": True, + **_EDITOR_CONTEXT, + }, + ) + + +@router.get("/configs/{reference}/edit", response_class=HTMLResponse) +async def config_edit_view(request: Request, reference: str): + now = datetime.now() + configs = get_fake_configs(now) + config = next((c for c in configs if c.reference == reference), None) + return templates.TemplateResponse( + request=request, + name="config_editor.html", + context={ + "title": f"Edit — {reference}" if config else "Edit Config", + "config": config, + "reference": reference, + "is_new": False, + **_EDITOR_CONTEXT, + }, + ) diff --git a/app/views/views.py b/app/views/views.py index b205245..4af15b7 100644 --- a/app/views/views.py +++ b/app/views/views.py @@ -14,7 +14,6 @@ from fastapi.templating import Jinja2Templates from app.models.recon_job import ReconJob # noqa: F401 (validates that the real model imports cleanly) from app.service.fake_jobs import FakeReconJob, get_fake_jobs from app.service.fake_configs import FakeReconConfig, get_fake_configs -from app.core.refdata import ReconPatterns, ReconConfigStatus router = APIRouter(prefix="", tags=["User_Interface"]) @@ -507,58 +506,3 @@ async def job_detail_view(request: Request, job_id: int): }, ) - -# ── Config editor ──────────────────────────────────────────────────────────── - -_EDITOR_CONTEXT = { - "patterns": [p.value for p in ReconPatterns], - "statuses": [s.value for s in ReconConfigStatus], - "frequencies": ["Ad Hoc", "Intra Day", "Daily", "Weekly", "Monthly", "Quarterly"], - "schema_types": ["str", "int", "float", "date('%Y-%m-%d')", "datetime('%Y-%m-%d %H:%M:%S')", "bool"], -} - - -@router.get("/configs/new", response_class=HTMLResponse) -async def config_new_view(request: Request): - return templates.TemplateResponse( - request=request, - name="config_editor.html", - context={ - "title": "New Config", - "config": None, - "is_new": True, - **_EDITOR_CONTEXT, - }, - ) - - -@router.get("/configs/{reference}/edit", response_class=HTMLResponse) -async def config_edit_view(request: Request, reference: str): - now = datetime.now() - configs = get_fake_configs(now) - config = next((c for c in configs if c.reference == reference), None) - return templates.TemplateResponse( - request=request, - name="config_editor.html", - context={ - "title": f"Edit — {reference}" if config else "Edit Config", - "config": config, - "reference": reference, - "is_new": False, - **_EDITOR_CONTEXT, - }, - ) - - -@router.get("/configs", response_class=HTMLResponse) -async def configs_list_view(request: Request): - now = datetime.now() - configs = get_fake_configs(now) - return templates.TemplateResponse( - request=request, - name="configs_list.html", - context={ - "title": "Recon Configs", - "configs": configs, - }, - ) diff --git a/data/input/20260526.csv b/data/input/20260526.csv new file mode 100644 index 0000000..d212c07 --- /dev/null +++ b/data/input/20260526.csv @@ -0,0 +1,6 @@ +account_id,customer_name,transaction_date,amount,currency,status,reference +ACC001,Alice Nguyen,2026-05-26,1500.00,NZD,MATCHED,REF-00001 +ACC002,Bob Tane,2026-05-26,820.50,NZD,UNMATCHED,REF-00002 +ACC003,Carol Smith,2026-05-26,3200.00,NZD,MATCHED,REF-00003 +ACC004,David Park,2026-05-26,415.75,NZD,PENDING,REF-00004 +ACC005,Eva Brown,2026-05-26,9900.00,NZD,MATCHED,REF-00005 diff --git a/data/static/css/styles.css b/data/static/css/styles.css index 87a456f..84f99c1 100644 --- a/data/static/css/styles.css +++ b/data/static/css/styles.css @@ -911,7 +911,8 @@ a.job-bar:hover { filter: brightness(1.1); box-shadow: 0 0 0 1px rgba(255,255,25 gap: 8px; align-items: stretch; } -.url-input { flex: 1; } +.url-input-row .url-scheme { flex-shrink: 0; width: auto; } +.url-input-row .url-path { flex: 1; width: 0; min-width: 0; } .url-test-result { margin-top: 6px; padding: 8px 10px; @@ -930,7 +931,7 @@ a.job-bar:hover { filter: brightness(1.1); box-shadow: 0 0 0 1px rgba(255,255,25 .schema-builder { display: flex; flex-direction: column; gap: 6px; } .kv-table-header { display: grid; - grid-template-columns: 1fr 1fr auto; + grid-template-columns: 1fr 1fr auto auto; gap: 8px; font-size: 10px; text-transform: uppercase; @@ -940,10 +941,17 @@ a.job-bar:hover { filter: brightness(1.1); box-shadow: 0 0 0 1px rgba(255,255,25 } .schema-row { display: grid; - grid-template-columns: 1fr 1fr auto; + grid-template-columns: 1fr 1fr auto auto; gap: 8px; align-items: center; } +.schema-row .idx-check { + width: 16px; + height: 16px; + accent-color: var(--accent); + cursor: pointer; + justify-self: center; +} .schema-row input, .schema-row select { border-radius: 5px; diff --git a/data/templates/config_editor.html b/data/templates/config_editor.html index 2d41941..350889e 100644 --- a/data/templates/config_editor.html +++ b/data/templates/config_editor.html @@ -176,21 +176,22 @@
- - + + +
- Supports: file:// mssql:// oracle:// snowflake:// duckdb://
Date templates: {% raw %}{{as_at_date.strftime('%Y%m%d')}} {{today.strftime('%Y%m%d')}}{% endraw %}
-
- - -
-
@@ -203,7 +204,7 @@ Schema 0 columns
- Column NameType + Column NameTypeIndex
@@ -263,8 +264,8 @@ Advanced
- - + +
@@ -313,7 +314,7 @@ function buildSyscfgCard(data, role, index) { } // Populate simple fields - const fields = ['name','url','day_offset','filter','sql','comment']; + const fields = ['name','day_offset','filter','sql','comment']; fields.forEach(f => { const el = card.querySelector(`[data-field="${f}"]`); if (!el) return; @@ -321,17 +322,31 @@ function buildSyscfgCard(data, role, index) { if (val !== undefined && val !== null) el.value = val; }); + // Split stored URL into scheme dropdown + path input + if (data.url) { + const m = data.url.match(/^([a-z]+:\/\/)([\s\S]*)$/i); + if (m) { + const schemeEl = card.querySelector('.url-scheme'); + const matchingOpt = Array.from(schemeEl.options).find(o => o.value === m[1].toLowerCase()); + if (matchingOpt) schemeEl.value = matchingOpt.value; + card.querySelector('.url-path').value = m[2]; + } else { + card.querySelector('.url-path').value = data.url; + } + } + // Populate list fields (comma-joined) - ['index_fields','obfuscate_fields','pci_redact_fields'].forEach(f => { + ['obfuscate_fields','pci_redact_fields'].forEach(f => { const el = card.querySelector(`[data-field="${f}"]`); if (el && Array.isArray(data[f])) el.value = data[f].join(', '); }); const fwEl = card.querySelector('[data-field="field_widths"]'); if (fwEl && Array.isArray(data.field_widths)) fwEl.value = data.field_widths.join(', '); - // Schema + // Schema — tick index columns derived from index_fields list const schemaData = data.system_schema || {}; - Object.entries(schemaData).forEach(([col, type]) => addSchemaRow(card, col, type)); + const indexSet = new Set(data.index_fields || []); + Object.entries(schemaData).forEach(([col, type]) => addSchemaRow(card, col, type, indexSet.has(col))); updateSchemaCount(card); // CSV spec @@ -353,11 +368,12 @@ function buildSyscfgCard(data, role, index) { } // URL change → show/hide specs, clear test result - const urlInput = card.querySelector('.url-input'); - urlInput.addEventListener('input', () => { + const onUrlChange = () => { syncSpecVisibility(card); card.querySelector('.url-test-result').setAttribute('hidden', ''); - }); + }; + card.querySelector('.url-scheme').addEventListener('change', onUrlChange); + card.querySelector('.url-path').addEventListener('input', onUrlChange); syncSpecVisibility(card); // Schema add button @@ -382,15 +398,16 @@ function setSpecCheckbox(card, spec, field, checked) { } function syncSpecVisibility(card) { - const url = (card.querySelector('.url-input').value || '').toLowerCase(); - const isFile = url.startsWith('file://'); - const isXml = isFile && /\.xml(\b|$|\?|#)/.test(url); + const scheme = card.querySelector('.url-scheme').value; + const path = (card.querySelector('.url-path').value || '').toLowerCase(); + const isFile = scheme === 'file://'; + const isXml = isFile && /\.xml(\b|$|\?|#)/.test(path); const isCsv = isFile && !isXml; card.querySelector('.syscfg-csv-spec').toggleAttribute('hidden', !isCsv); card.querySelector('.syscfg-xml-spec').toggleAttribute('hidden', !isXml); } -function addSchemaRow(card, colName, colType) { +function addSchemaRow(card, colName, colType, isIndex) { const container = card.querySelector('.schema-rows'); const row = document.createElement('div'); row.className = 'schema-row'; @@ -409,13 +426,19 @@ function addSchemaRow(card, colName, colType) { typeSelect.appendChild(opt); }); + const idxCheck = document.createElement('input'); + idxCheck.type = 'checkbox'; + idxCheck.className = 'idx-check'; + idxCheck.checked = !!isIndex; + idxCheck.title = 'Use as index / join key'; + const removeBtn = document.createElement('button'); removeBtn.type = 'button'; removeBtn.className = 'btn btn-secondary btn-sm'; removeBtn.textContent = '×'; removeBtn.addEventListener('click', () => { row.remove(); updateSchemaCount(card); }); - row.append(colInput, typeSelect, removeBtn); + row.append(colInput, typeSelect, idxCheck, removeBtn); container.appendChild(row); } @@ -442,16 +465,19 @@ function readSyscfgCard(card) { const toList = (val) => val.split(',').map(s => s.trim()).filter(Boolean); const toIntList = (val) => val.split(',').map(s => parseInt(s.trim(), 10)).filter(n => !isNaN(n)); - const url = g('url'); - const urlLower = url.toLowerCase(); - const isFile = urlLower.startsWith('file://'); - const isXml = isFile && /\.xml(\b|$)/.test(urlLower); + const scheme = card.querySelector('.url-scheme').value; + const url = scheme + (card.querySelector('.url-path').value || ''); + const isFile = scheme === 'file://'; + const isXml = isFile && /\.xml(\b|$)/.test(url.toLowerCase()); const schema = {}; + const index_fields = []; card.querySelectorAll('.schema-rows .schema-row').forEach(row => { - const name = row.querySelector('input').value.trim(); + const name = row.querySelector('input[type="text"]').value.trim(); const type = row.querySelector('select').value; - if (name) schema[name] = type; + if (!name) return; + schema[name] = type; + if (row.querySelector('.idx-check')?.checked) index_fields.push(name); }); const spec = (specName, fields) => { @@ -483,7 +509,7 @@ function readSyscfgCard(card) { system_schema: schema, filter: g('filter'), sql: g('sql'), - index_fields: toList(g('index_fields')), + index_fields, obfuscate_fields: toList(g('obfuscate_fields')), pci_redact_fields: toList(g('pci_redact_fields')), field_widths: toIntList(g('field_widths')), @@ -556,34 +582,39 @@ function collectPayload() { // ── Try URL ───────────────────────────────────────────────────────────────── async function testUrl(card) { - const url = card.querySelector('.url-input').value.trim(); + const url = card.querySelector('.url-scheme').value + card.querySelector('.url-path').value.trim(); const resultEl = card.querySelector('.url-test-result'); if (!url) { showToast('Enter a URL first.', 'warn'); return; } const btn = card.querySelector('.btn-test-url'); - btn.disabled = true; btn.textContent = 'Checking…'; + btn.disabled = true; btn.textContent = 'Inspecting…'; resultEl.removeAttribute('hidden'); resultEl.className = 'url-test-result url-test-loading'; - resultEl.textContent = 'Checking URL…'; + resultEl.textContent = 'Inspecting URL…'; + const csvSpec = { + delimiter: card.querySelector('[data-spec="csv"][data-field="delimiter"]')?.value || ',', + has_header: card.querySelector('[data-spec="csv"][data-field="header"]')?.checked !== false, + }; try { const resp = await fetch('/api/configs/test-url', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ url }), + body: JSON.stringify({ url, csv_spec: csvSpec }), }); const data = await resp.json(); - renderUrlTestResult(resultEl, data); + renderUrlTestResult(resultEl, data, card); } catch (err) { resultEl.className = 'url-test-result url-test-error'; resultEl.textContent = `Error: ${err.message}`; } finally { - btn.disabled = false; btn.textContent = 'Try URL'; + btn.disabled = false; btn.textContent = 'Inspect URL'; } } -function renderUrlTestResult(el, data) { - if (data.type === 'non-file') { - el.className = 'url-test-result url-test-info'; - el.textContent = data.message; +function renderUrlTestResult(el, data, card) { + el.textContent = ''; + + if (data.type === 'db') { + el.setAttribute('hidden', ''); return; } if (!data.found) { @@ -591,14 +622,59 @@ function renderUrlTestResult(el, data) { el.textContent = `Not found: ${data.message || data.resolved}`; return; } + el.className = 'url-test-result url-test-ok'; + + // File stats const lines = [`✓ ${data.matches} file${data.matches !== 1 ? 's' : ''} matched`]; (data.files || []).forEach(f => { if (f.error) { lines.push(` ⚠ ${f.path}: ${f.error}`); return; } const kb = (f.size_bytes / 1024).toFixed(1); lines.push(` ${f.path.split('/').pop()} ${kb} KB (modified ${f.modified})`); }); - el.textContent = lines.join('\n'); + const pre = document.createElement('span'); + pre.textContent = lines.join('\n'); + el.appendChild(pre); + + // Schema + const schema = data.schema || {}; + const colCount = Object.keys(schema).length; + if (colCount > 0) { + const schemaLine = document.createElement('div'); + schemaLine.style.cssText = 'margin-top:6px; display:flex; align-items:center; gap:10px; flex-wrap:wrap;'; + const label = document.createElement('span'); + label.textContent = ` ${colCount} column${colCount !== 1 ? 's' : ''} detected`; + const applyBtn = document.createElement('button'); + applyBtn.type = 'button'; + applyBtn.className = 'btn btn-primary btn-sm'; + applyBtn.textContent = 'Apply Schema'; + applyBtn.addEventListener('click', () => { + applySchema(card, schema); + applyBtn.textContent = '✓ Applied'; + applyBtn.disabled = true; + }); + schemaLine.append(label, applyBtn); + el.appendChild(schemaLine); + } +} + +function applySchema(card, schema) { + const container = card.querySelector('.schema-rows'); + // Preserve existing index ticks by column name before clearing + const existingIndex = new Set(); + container.querySelectorAll('.schema-row').forEach(row => { + if (row.querySelector('.idx-check')?.checked) + existingIndex.add(row.querySelector('input[type="text"]').value.trim()); + }); + container.innerHTML = ''; + Object.entries(schema).forEach(([col, type]) => + addSchemaRow(card, col, type, existingIndex.has(col)) + ); + updateSchemaCount(card); + // Open the schema section so the user sees it + card.querySelector('.syscfg-section details, details.syscfg-section')?.setAttribute('open', ''); + card.querySelector('.schema-builder')?.closest('details')?.setAttribute('open', ''); + showToast(`Schema applied — ${Object.keys(schema).length} columns.`, 'ok'); } // ── Toast ───────────────────────────────────────────────────────────────────