Auto schemas

This commit is contained in:
2026-05-26 22:34:28 +12:00
parent 35d70a7746
commit d4969172c2
7 changed files with 320 additions and 132 deletions
+116 -32
View File
@@ -9,8 +9,10 @@ returns metadata (no file contents are returned).
"""
from __future__ import annotations
import csv
import glob
import os
import xml.etree.ElementTree as ET
from datetime import datetime
from pathlib import Path
from typing import Any
@@ -153,23 +155,96 @@ async def update_config(reference: str, request: Request):
)
# ── Schema inference helpers ────────────────────────────────────────────────
_DATE_FORMATS = ["%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y%m%d", "%d-%m-%Y"]
_DATETIME_FORMATS = ["%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S.%f"]
def _infer_type(values: list[str]) -> str:
sample = [v.strip() for v in values if v.strip()][:30]
if not sample:
return "str"
try:
[int(v.replace(",", "")) for v in sample]
return "int"
except ValueError:
pass
try:
[float(v.replace(",", "")) for v in sample]
return "float"
except ValueError:
pass
for fmt in _DATETIME_FORMATS:
try:
[datetime.strptime(v, fmt) for v in sample]
return f"datetime('{fmt}')"
except ValueError:
pass
for fmt in _DATE_FORMATS:
try:
[datetime.strptime(v, fmt) for v in sample]
return f"date('{fmt}')"
except ValueError:
pass
return "str"
def _csv_schema(path: str, delimiter: str = ",", has_header: bool = True) -> dict[str, str]:
try:
with open(path, newline="", encoding="utf-8", errors="replace") as f:
reader = csv.reader(f, delimiter=delimiter)
rows = [r for _, r in zip(range(31), reader)]
if not rows:
return {}
headers = rows[0] if has_header else [f"col_{i}" for i in range(len(rows[0]))]
data_rows = rows[1:] if has_header else rows
return {
col: _infer_type([r[i] for r in data_rows if i < len(r)])
for i, col in enumerate(headers)
if col.strip()
}
except Exception:
return {}
def _xml_schema(path: str, xpathstr: str = "./*") -> dict[str, str]:
try:
tree = ET.parse(path)
root = tree.getroot()
elements = root.findall(xpathstr) or list(root)
if not elements:
return {}
schema: dict[str, str] = {}
for el in elements[:1]:
for child in el:
schema[child.tag] = "str"
for attr in el.attrib:
schema[f"@{attr}"] = "str"
return schema
except Exception:
return {}
# ── Test URL ────────────────────────────────────────────────────────────────
@router.post("/test-url")
async def test_url(request: Request):
"""Return metadata for a URL pattern — no file contents ever returned."""
"""Return file/DB metadata and inferred schema — no row data ever returned."""
from datetime import date as date_type
body = await request.json()
url: str = body.get("url", "")
as_at_date: str = body.get("as_at_date", datetime.now().strftime("%Y%m%d"))
csv_spec: dict = body.get("csv_spec", {})
# ── DB URL — schema inspection wired up per-connector when available ──────
if not url.startswith("file://"):
return {"type": "non-file", "message": "Only file:// URLs can be tested here. Database connections are validated at run time."}
scheme = url.split("://")[0] if "://" in url else url
return {"type": "db", "scheme": scheme, "schema": {}, "found": False}
# Resolve the path part (strip file://)
# ── file:// URL ──────────────────────────────────────────────────────────
raw_path = url[7:]
# Replace simple template variables for preview purposes
from datetime import date as date_type
try:
as_at = datetime.strptime(as_at_date, "%Y%m%d").date()
except ValueError:
@@ -183,40 +258,49 @@ async def test_url(request: Request):
.replace("{{today.strftime('%Y-%m-%d')}}", date_type.today().strftime("%Y-%m-%d"))
)
# Treat as glob pattern for anything still containing {
def _file_info(p: str) -> dict:
try:
st = os.stat(p)
return {"path": p, "size_bytes": st.st_size,
"modified": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds")}
except OSError:
return {"path": p, "error": "Could not stat file"}
def _schema_for(p: str) -> dict[str, str]:
pl = p.lower()
if pl.endswith(".xml"):
return _xml_schema(p)
delimiter = csv_spec.get("delimiter", ",") or ","
has_header = csv_spec.get("has_header", True)
return _csv_schema(p, delimiter=delimiter, has_header=has_header)
# Glob pattern
if "*" in resolved or "?" in resolved or "{" in resolved:
matches = glob.glob(resolved)
matches = sorted(glob.glob(resolved))
if not matches:
return {"type": "file", "resolved": resolved, "found": False, "message": "No files matched the pattern."}
file_infos = []
for p in sorted(matches)[:10]:
try:
st = os.stat(p)
file_infos.append({
"path": p,
"size_bytes": st.st_size,
"modified": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"),
})
except OSError:
file_infos.append({"path": p, "error": "Could not stat file"})
return {"type": "file", "resolved": resolved, "found": True, "matches": len(matches), "files": file_infos}
return {"type": "file", "resolved": resolved, "found": False,
"message": "No files matched the pattern.", "schema": {}}
schema = _schema_for(matches[0])
return {
"type": "file", "resolved": resolved, "found": True,
"matches": len(matches),
"files": [_file_info(p) for p in matches[:10]],
"schema": schema,
}
# Exact path
p = Path(resolved)
if not p.exists():
return {"type": "file", "resolved": resolved, "found": False, "message": f"File not found: {resolved}"}
return {"type": "file", "resolved": resolved, "found": False,
"message": f"File not found: {resolved}", "schema": {}}
try:
st = p.stat()
schema = _schema_for(str(p))
return {
"type": "file",
"resolved": resolved,
"found": True,
"type": "file", "resolved": resolved, "found": True,
"matches": 1,
"files": [{
"path": str(p),
"size_bytes": st.st_size,
"modified": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"),
}],
"files": [_file_info(str(p))],
"schema": schema,
}
except OSError as e:
return {"type": "file", "resolved": resolved, "found": False, "message": str(e)}
return {"type": "file", "resolved": resolved, "found": False,
"message": str(e), "schema": {}}
+2
View File
@@ -10,6 +10,7 @@ from app.api.configs import router as configs_router
from app.core.settings import get_settings
from app.views.auth import router as auth_router
from app.views.views import router as dashboard_router
from app.views.config_views import router as config_views_router
from app.views.docs import router as docs_router
@@ -38,5 +39,6 @@ def create_app() -> FastAPI:
app.include_router(configs_router)
app.include_router(auth_router)
app.include_router(dashboard_router)
app.include_router(config_views_router)
app.include_router(docs_router)
return app
+68
View File
@@ -0,0 +1,68 @@
"""Config list and editor views."""
from pathlib import Path
from datetime import datetime
from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from app.core.refdata import ReconPatterns, ReconConfigStatus
from app.service.fake_configs import get_fake_configs
router = APIRouter(prefix="", tags=["Config_UI"])
_project_root = Path(__file__).resolve().parents[2]
templates = Jinja2Templates(directory=str(_project_root / "data" / "templates"))
_EDITOR_CONTEXT = {
"patterns": [p.value for p in ReconPatterns],
"statuses": [s.value for s in ReconConfigStatus],
"frequencies": ["Ad Hoc", "Intra Day", "Daily", "Weekly", "Monthly", "Quarterly"],
"schema_types": ["str", "int", "float", "date('%Y-%m-%d')", "datetime('%Y-%m-%d %H:%M:%S')", "bool"],
}
@router.get("/configs", response_class=HTMLResponse)
async def configs_list_view(request: Request):
now = datetime.now()
configs = get_fake_configs(now)
return templates.TemplateResponse(
request=request,
name="configs_list.html",
context={
"title": "Recon Configs",
"configs": configs,
},
)
@router.get("/configs/new", response_class=HTMLResponse)
async def config_new_view(request: Request):
return templates.TemplateResponse(
request=request,
name="config_editor.html",
context={
"title": "New Config",
"config": None,
"is_new": True,
**_EDITOR_CONTEXT,
},
)
@router.get("/configs/{reference}/edit", response_class=HTMLResponse)
async def config_edit_view(request: Request, reference: str):
now = datetime.now()
configs = get_fake_configs(now)
config = next((c for c in configs if c.reference == reference), None)
return templates.TemplateResponse(
request=request,
name="config_editor.html",
context={
"title": f"Edit — {reference}" if config else "Edit Config",
"config": config,
"reference": reference,
"is_new": False,
**_EDITOR_CONTEXT,
},
)
-56
View File
@@ -14,7 +14,6 @@ from fastapi.templating import Jinja2Templates
from app.models.recon_job import ReconJob # noqa: F401 (validates that the real model imports cleanly)
from app.service.fake_jobs import FakeReconJob, get_fake_jobs
from app.service.fake_configs import FakeReconConfig, get_fake_configs
from app.core.refdata import ReconPatterns, ReconConfigStatus
router = APIRouter(prefix="", tags=["User_Interface"])
@@ -507,58 +506,3 @@ async def job_detail_view(request: Request, job_id: int):
},
)
# ── Config editor ────────────────────────────────────────────────────────────
_EDITOR_CONTEXT = {
"patterns": [p.value for p in ReconPatterns],
"statuses": [s.value for s in ReconConfigStatus],
"frequencies": ["Ad Hoc", "Intra Day", "Daily", "Weekly", "Monthly", "Quarterly"],
"schema_types": ["str", "int", "float", "date('%Y-%m-%d')", "datetime('%Y-%m-%d %H:%M:%S')", "bool"],
}
@router.get("/configs/new", response_class=HTMLResponse)
async def config_new_view(request: Request):
return templates.TemplateResponse(
request=request,
name="config_editor.html",
context={
"title": "New Config",
"config": None,
"is_new": True,
**_EDITOR_CONTEXT,
},
)
@router.get("/configs/{reference}/edit", response_class=HTMLResponse)
async def config_edit_view(request: Request, reference: str):
now = datetime.now()
configs = get_fake_configs(now)
config = next((c for c in configs if c.reference == reference), None)
return templates.TemplateResponse(
request=request,
name="config_editor.html",
context={
"title": f"Edit — {reference}" if config else "Edit Config",
"config": config,
"reference": reference,
"is_new": False,
**_EDITOR_CONTEXT,
},
)
@router.get("/configs", response_class=HTMLResponse)
async def configs_list_view(request: Request):
now = datetime.now()
configs = get_fake_configs(now)
return templates.TemplateResponse(
request=request,
name="configs_list.html",
context={
"title": "Recon Configs",
"configs": configs,
},
)