diff --git a/app/api/configs.py b/app/api/configs.py index fc7ac40..752b838 100644 --- a/app/api/configs.py +++ b/app/api/configs.py @@ -9,8 +9,10 @@ returns metadata (no file contents are returned). """ from __future__ import annotations +import csv import glob import os +import xml.etree.ElementTree as ET from datetime import datetime from pathlib import Path from typing import Any @@ -153,23 +155,96 @@ async def update_config(reference: str, request: Request): ) +# ── Schema inference helpers ──────────────────────────────────────────────── + +_DATE_FORMATS = ["%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y%m%d", "%d-%m-%Y"] +_DATETIME_FORMATS = ["%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S.%f"] + + +def _infer_type(values: list[str]) -> str: + sample = [v.strip() for v in values if v.strip()][:30] + if not sample: + return "str" + try: + [int(v.replace(",", "")) for v in sample] + return "int" + except ValueError: + pass + try: + [float(v.replace(",", "")) for v in sample] + return "float" + except ValueError: + pass + for fmt in _DATETIME_FORMATS: + try: + [datetime.strptime(v, fmt) for v in sample] + return f"datetime('{fmt}')" + except ValueError: + pass + for fmt in _DATE_FORMATS: + try: + [datetime.strptime(v, fmt) for v in sample] + return f"date('{fmt}')" + except ValueError: + pass + return "str" + + +def _csv_schema(path: str, delimiter: str = ",", has_header: bool = True) -> dict[str, str]: + try: + with open(path, newline="", encoding="utf-8", errors="replace") as f: + reader = csv.reader(f, delimiter=delimiter) + rows = [r for _, r in zip(range(31), reader)] + if not rows: + return {} + headers = rows[0] if has_header else [f"col_{i}" for i in range(len(rows[0]))] + data_rows = rows[1:] if has_header else rows + return { + col: _infer_type([r[i] for r in data_rows if i < len(r)]) + for i, col in enumerate(headers) + if col.strip() + } + except Exception: + return {} + + +def _xml_schema(path: str, xpathstr: str = "./*") -> dict[str, str]: + try: + tree = ET.parse(path) + root = tree.getroot() + elements = root.findall(xpathstr) or list(root) + if not elements: + return {} + schema: dict[str, str] = {} + for el in elements[:1]: + for child in el: + schema[child.tag] = "str" + for attr in el.attrib: + schema[f"@{attr}"] = "str" + return schema + except Exception: + return {} + + # ── Test URL ──────────────────────────────────────────────────────────────── @router.post("/test-url") async def test_url(request: Request): - """Return metadata for a URL pattern — no file contents ever returned.""" + """Return file/DB metadata and inferred schema — no row data ever returned.""" + from datetime import date as date_type + body = await request.json() url: str = body.get("url", "") as_at_date: str = body.get("as_at_date", datetime.now().strftime("%Y%m%d")) + csv_spec: dict = body.get("csv_spec", {}) + # ── DB URL — schema inspection wired up per-connector when available ────── if not url.startswith("file://"): - return {"type": "non-file", "message": "Only file:// URLs can be tested here. Database connections are validated at run time."} + scheme = url.split("://")[0] if "://" in url else url + return {"type": "db", "scheme": scheme, "schema": {}, "found": False} - # Resolve the path part (strip file://) + # ── file:// URL ────────────────────────────────────────────────────────── raw_path = url[7:] - - # Replace simple template variables for preview purposes - from datetime import date as date_type try: as_at = datetime.strptime(as_at_date, "%Y%m%d").date() except ValueError: @@ -183,40 +258,49 @@ async def test_url(request: Request): .replace("{{today.strftime('%Y-%m-%d')}}", date_type.today().strftime("%Y-%m-%d")) ) - # Treat as glob pattern for anything still containing { + def _file_info(p: str) -> dict: + try: + st = os.stat(p) + return {"path": p, "size_bytes": st.st_size, + "modified": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds")} + except OSError: + return {"path": p, "error": "Could not stat file"} + + def _schema_for(p: str) -> dict[str, str]: + pl = p.lower() + if pl.endswith(".xml"): + return _xml_schema(p) + delimiter = csv_spec.get("delimiter", ",") or "," + has_header = csv_spec.get("has_header", True) + return _csv_schema(p, delimiter=delimiter, has_header=has_header) + + # Glob pattern if "*" in resolved or "?" in resolved or "{" in resolved: - matches = glob.glob(resolved) + matches = sorted(glob.glob(resolved)) if not matches: - return {"type": "file", "resolved": resolved, "found": False, "message": "No files matched the pattern."} - file_infos = [] - for p in sorted(matches)[:10]: - try: - st = os.stat(p) - file_infos.append({ - "path": p, - "size_bytes": st.st_size, - "modified": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"), - }) - except OSError: - file_infos.append({"path": p, "error": "Could not stat file"}) - return {"type": "file", "resolved": resolved, "found": True, "matches": len(matches), "files": file_infos} + return {"type": "file", "resolved": resolved, "found": False, + "message": "No files matched the pattern.", "schema": {}} + schema = _schema_for(matches[0]) + return { + "type": "file", "resolved": resolved, "found": True, + "matches": len(matches), + "files": [_file_info(p) for p in matches[:10]], + "schema": schema, + } # Exact path p = Path(resolved) if not p.exists(): - return {"type": "file", "resolved": resolved, "found": False, "message": f"File not found: {resolved}"} + return {"type": "file", "resolved": resolved, "found": False, + "message": f"File not found: {resolved}", "schema": {}} try: - st = p.stat() + schema = _schema_for(str(p)) return { - "type": "file", - "resolved": resolved, - "found": True, + "type": "file", "resolved": resolved, "found": True, "matches": 1, - "files": [{ - "path": str(p), - "size_bytes": st.st_size, - "modified": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"), - }], + "files": [_file_info(str(p))], + "schema": schema, } except OSError as e: - return {"type": "file", "resolved": resolved, "found": False, "message": str(e)} + return {"type": "file", "resolved": resolved, "found": False, + "message": str(e), "schema": {}} diff --git a/app/core/app_factory.py b/app/core/app_factory.py index 6824afd..6e6303f 100644 --- a/app/core/app_factory.py +++ b/app/core/app_factory.py @@ -10,6 +10,7 @@ from app.api.configs import router as configs_router from app.core.settings import get_settings from app.views.auth import router as auth_router from app.views.views import router as dashboard_router +from app.views.config_views import router as config_views_router from app.views.docs import router as docs_router @@ -38,5 +39,6 @@ def create_app() -> FastAPI: app.include_router(configs_router) app.include_router(auth_router) app.include_router(dashboard_router) + app.include_router(config_views_router) app.include_router(docs_router) return app diff --git a/app/views/config_views.py b/app/views/config_views.py new file mode 100644 index 0000000..5f31891 --- /dev/null +++ b/app/views/config_views.py @@ -0,0 +1,68 @@ +"""Config list and editor views.""" +from pathlib import Path +from datetime import datetime + +from fastapi import APIRouter, Request +from fastapi.responses import HTMLResponse +from fastapi.templating import Jinja2Templates + +from app.core.refdata import ReconPatterns, ReconConfigStatus +from app.service.fake_configs import get_fake_configs + +router = APIRouter(prefix="", tags=["Config_UI"]) + +_project_root = Path(__file__).resolve().parents[2] +templates = Jinja2Templates(directory=str(_project_root / "data" / "templates")) + +_EDITOR_CONTEXT = { + "patterns": [p.value for p in ReconPatterns], + "statuses": [s.value for s in ReconConfigStatus], + "frequencies": ["Ad Hoc", "Intra Day", "Daily", "Weekly", "Monthly", "Quarterly"], + "schema_types": ["str", "int", "float", "date('%Y-%m-%d')", "datetime('%Y-%m-%d %H:%M:%S')", "bool"], +} + + +@router.get("/configs", response_class=HTMLResponse) +async def configs_list_view(request: Request): + now = datetime.now() + configs = get_fake_configs(now) + return templates.TemplateResponse( + request=request, + name="configs_list.html", + context={ + "title": "Recon Configs", + "configs": configs, + }, + ) + + +@router.get("/configs/new", response_class=HTMLResponse) +async def config_new_view(request: Request): + return templates.TemplateResponse( + request=request, + name="config_editor.html", + context={ + "title": "New Config", + "config": None, + "is_new": True, + **_EDITOR_CONTEXT, + }, + ) + + +@router.get("/configs/{reference}/edit", response_class=HTMLResponse) +async def config_edit_view(request: Request, reference: str): + now = datetime.now() + configs = get_fake_configs(now) + config = next((c for c in configs if c.reference == reference), None) + return templates.TemplateResponse( + request=request, + name="config_editor.html", + context={ + "title": f"Edit — {reference}" if config else "Edit Config", + "config": config, + "reference": reference, + "is_new": False, + **_EDITOR_CONTEXT, + }, + ) diff --git a/app/views/views.py b/app/views/views.py index b205245..4af15b7 100644 --- a/app/views/views.py +++ b/app/views/views.py @@ -14,7 +14,6 @@ from fastapi.templating import Jinja2Templates from app.models.recon_job import ReconJob # noqa: F401 (validates that the real model imports cleanly) from app.service.fake_jobs import FakeReconJob, get_fake_jobs from app.service.fake_configs import FakeReconConfig, get_fake_configs -from app.core.refdata import ReconPatterns, ReconConfigStatus router = APIRouter(prefix="", tags=["User_Interface"]) @@ -507,58 +506,3 @@ async def job_detail_view(request: Request, job_id: int): }, ) - -# ── Config editor ──────────────────────────────────────────────────────────── - -_EDITOR_CONTEXT = { - "patterns": [p.value for p in ReconPatterns], - "statuses": [s.value for s in ReconConfigStatus], - "frequencies": ["Ad Hoc", "Intra Day", "Daily", "Weekly", "Monthly", "Quarterly"], - "schema_types": ["str", "int", "float", "date('%Y-%m-%d')", "datetime('%Y-%m-%d %H:%M:%S')", "bool"], -} - - -@router.get("/configs/new", response_class=HTMLResponse) -async def config_new_view(request: Request): - return templates.TemplateResponse( - request=request, - name="config_editor.html", - context={ - "title": "New Config", - "config": None, - "is_new": True, - **_EDITOR_CONTEXT, - }, - ) - - -@router.get("/configs/{reference}/edit", response_class=HTMLResponse) -async def config_edit_view(request: Request, reference: str): - now = datetime.now() - configs = get_fake_configs(now) - config = next((c for c in configs if c.reference == reference), None) - return templates.TemplateResponse( - request=request, - name="config_editor.html", - context={ - "title": f"Edit — {reference}" if config else "Edit Config", - "config": config, - "reference": reference, - "is_new": False, - **_EDITOR_CONTEXT, - }, - ) - - -@router.get("/configs", response_class=HTMLResponse) -async def configs_list_view(request: Request): - now = datetime.now() - configs = get_fake_configs(now) - return templates.TemplateResponse( - request=request, - name="configs_list.html", - context={ - "title": "Recon Configs", - "configs": configs, - }, - ) diff --git a/data/input/20260526.csv b/data/input/20260526.csv new file mode 100644 index 0000000..d212c07 --- /dev/null +++ b/data/input/20260526.csv @@ -0,0 +1,6 @@ +account_id,customer_name,transaction_date,amount,currency,status,reference +ACC001,Alice Nguyen,2026-05-26,1500.00,NZD,MATCHED,REF-00001 +ACC002,Bob Tane,2026-05-26,820.50,NZD,UNMATCHED,REF-00002 +ACC003,Carol Smith,2026-05-26,3200.00,NZD,MATCHED,REF-00003 +ACC004,David Park,2026-05-26,415.75,NZD,PENDING,REF-00004 +ACC005,Eva Brown,2026-05-26,9900.00,NZD,MATCHED,REF-00005 diff --git a/data/static/css/styles.css b/data/static/css/styles.css index 87a456f..84f99c1 100644 --- a/data/static/css/styles.css +++ b/data/static/css/styles.css @@ -911,7 +911,8 @@ a.job-bar:hover { filter: brightness(1.1); box-shadow: 0 0 0 1px rgba(255,255,25 gap: 8px; align-items: stretch; } -.url-input { flex: 1; } +.url-input-row .url-scheme { flex-shrink: 0; width: auto; } +.url-input-row .url-path { flex: 1; width: 0; min-width: 0; } .url-test-result { margin-top: 6px; padding: 8px 10px; @@ -930,7 +931,7 @@ a.job-bar:hover { filter: brightness(1.1); box-shadow: 0 0 0 1px rgba(255,255,25 .schema-builder { display: flex; flex-direction: column; gap: 6px; } .kv-table-header { display: grid; - grid-template-columns: 1fr 1fr auto; + grid-template-columns: 1fr 1fr auto auto; gap: 8px; font-size: 10px; text-transform: uppercase; @@ -940,10 +941,17 @@ a.job-bar:hover { filter: brightness(1.1); box-shadow: 0 0 0 1px rgba(255,255,25 } .schema-row { display: grid; - grid-template-columns: 1fr 1fr auto; + grid-template-columns: 1fr 1fr auto auto; gap: 8px; align-items: center; } +.schema-row .idx-check { + width: 16px; + height: 16px; + accent-color: var(--accent); + cursor: pointer; + justify-self: center; +} .schema-row input, .schema-row select { border-radius: 5px; diff --git a/data/templates/config_editor.html b/data/templates/config_editor.html index 2d41941..350889e 100644 --- a/data/templates/config_editor.html +++ b/data/templates/config_editor.html @@ -176,21 +176,22 @@
file:// mssql:// oracle:// snowflake:// duckdb://{{as_at_date.strftime('%Y%m%d')}} {{today.strftime('%Y%m%d')}}{% endraw %}