Auto schemas
This commit is contained in:
+116
-32
@@ -9,8 +9,10 @@ returns metadata (no file contents are returned).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import glob
|
||||
import os
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
@@ -153,23 +155,96 @@ async def update_config(reference: str, request: Request):
|
||||
)
|
||||
|
||||
|
||||
# ── Schema inference helpers ────────────────────────────────────────────────
|
||||
|
||||
_DATE_FORMATS = ["%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y%m%d", "%d-%m-%Y"]
|
||||
_DATETIME_FORMATS = ["%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S.%f"]
|
||||
|
||||
|
||||
def _infer_type(values: list[str]) -> str:
|
||||
sample = [v.strip() for v in values if v.strip()][:30]
|
||||
if not sample:
|
||||
return "str"
|
||||
try:
|
||||
[int(v.replace(",", "")) for v in sample]
|
||||
return "int"
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
[float(v.replace(",", "")) for v in sample]
|
||||
return "float"
|
||||
except ValueError:
|
||||
pass
|
||||
for fmt in _DATETIME_FORMATS:
|
||||
try:
|
||||
[datetime.strptime(v, fmt) for v in sample]
|
||||
return f"datetime('{fmt}')"
|
||||
except ValueError:
|
||||
pass
|
||||
for fmt in _DATE_FORMATS:
|
||||
try:
|
||||
[datetime.strptime(v, fmt) for v in sample]
|
||||
return f"date('{fmt}')"
|
||||
except ValueError:
|
||||
pass
|
||||
return "str"
|
||||
|
||||
|
||||
def _csv_schema(path: str, delimiter: str = ",", has_header: bool = True) -> dict[str, str]:
|
||||
try:
|
||||
with open(path, newline="", encoding="utf-8", errors="replace") as f:
|
||||
reader = csv.reader(f, delimiter=delimiter)
|
||||
rows = [r for _, r in zip(range(31), reader)]
|
||||
if not rows:
|
||||
return {}
|
||||
headers = rows[0] if has_header else [f"col_{i}" for i in range(len(rows[0]))]
|
||||
data_rows = rows[1:] if has_header else rows
|
||||
return {
|
||||
col: _infer_type([r[i] for r in data_rows if i < len(r)])
|
||||
for i, col in enumerate(headers)
|
||||
if col.strip()
|
||||
}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def _xml_schema(path: str, xpathstr: str = "./*") -> dict[str, str]:
|
||||
try:
|
||||
tree = ET.parse(path)
|
||||
root = tree.getroot()
|
||||
elements = root.findall(xpathstr) or list(root)
|
||||
if not elements:
|
||||
return {}
|
||||
schema: dict[str, str] = {}
|
||||
for el in elements[:1]:
|
||||
for child in el:
|
||||
schema[child.tag] = "str"
|
||||
for attr in el.attrib:
|
||||
schema[f"@{attr}"] = "str"
|
||||
return schema
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
# ── Test URL ────────────────────────────────────────────────────────────────
|
||||
|
||||
@router.post("/test-url")
|
||||
async def test_url(request: Request):
|
||||
"""Return metadata for a URL pattern — no file contents ever returned."""
|
||||
"""Return file/DB metadata and inferred schema — no row data ever returned."""
|
||||
from datetime import date as date_type
|
||||
|
||||
body = await request.json()
|
||||
url: str = body.get("url", "")
|
||||
as_at_date: str = body.get("as_at_date", datetime.now().strftime("%Y%m%d"))
|
||||
csv_spec: dict = body.get("csv_spec", {})
|
||||
|
||||
# ── DB URL — schema inspection wired up per-connector when available ──────
|
||||
if not url.startswith("file://"):
|
||||
return {"type": "non-file", "message": "Only file:// URLs can be tested here. Database connections are validated at run time."}
|
||||
scheme = url.split("://")[0] if "://" in url else url
|
||||
return {"type": "db", "scheme": scheme, "schema": {}, "found": False}
|
||||
|
||||
# Resolve the path part (strip file://)
|
||||
# ── file:// URL ──────────────────────────────────────────────────────────
|
||||
raw_path = url[7:]
|
||||
|
||||
# Replace simple template variables for preview purposes
|
||||
from datetime import date as date_type
|
||||
try:
|
||||
as_at = datetime.strptime(as_at_date, "%Y%m%d").date()
|
||||
except ValueError:
|
||||
@@ -183,40 +258,49 @@ async def test_url(request: Request):
|
||||
.replace("{{today.strftime('%Y-%m-%d')}}", date_type.today().strftime("%Y-%m-%d"))
|
||||
)
|
||||
|
||||
# Treat as glob pattern for anything still containing {
|
||||
def _file_info(p: str) -> dict:
|
||||
try:
|
||||
st = os.stat(p)
|
||||
return {"path": p, "size_bytes": st.st_size,
|
||||
"modified": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds")}
|
||||
except OSError:
|
||||
return {"path": p, "error": "Could not stat file"}
|
||||
|
||||
def _schema_for(p: str) -> dict[str, str]:
|
||||
pl = p.lower()
|
||||
if pl.endswith(".xml"):
|
||||
return _xml_schema(p)
|
||||
delimiter = csv_spec.get("delimiter", ",") or ","
|
||||
has_header = csv_spec.get("has_header", True)
|
||||
return _csv_schema(p, delimiter=delimiter, has_header=has_header)
|
||||
|
||||
# Glob pattern
|
||||
if "*" in resolved or "?" in resolved or "{" in resolved:
|
||||
matches = glob.glob(resolved)
|
||||
matches = sorted(glob.glob(resolved))
|
||||
if not matches:
|
||||
return {"type": "file", "resolved": resolved, "found": False, "message": "No files matched the pattern."}
|
||||
file_infos = []
|
||||
for p in sorted(matches)[:10]:
|
||||
try:
|
||||
st = os.stat(p)
|
||||
file_infos.append({
|
||||
"path": p,
|
||||
"size_bytes": st.st_size,
|
||||
"modified": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"),
|
||||
})
|
||||
except OSError:
|
||||
file_infos.append({"path": p, "error": "Could not stat file"})
|
||||
return {"type": "file", "resolved": resolved, "found": True, "matches": len(matches), "files": file_infos}
|
||||
return {"type": "file", "resolved": resolved, "found": False,
|
||||
"message": "No files matched the pattern.", "schema": {}}
|
||||
schema = _schema_for(matches[0])
|
||||
return {
|
||||
"type": "file", "resolved": resolved, "found": True,
|
||||
"matches": len(matches),
|
||||
"files": [_file_info(p) for p in matches[:10]],
|
||||
"schema": schema,
|
||||
}
|
||||
|
||||
# Exact path
|
||||
p = Path(resolved)
|
||||
if not p.exists():
|
||||
return {"type": "file", "resolved": resolved, "found": False, "message": f"File not found: {resolved}"}
|
||||
return {"type": "file", "resolved": resolved, "found": False,
|
||||
"message": f"File not found: {resolved}", "schema": {}}
|
||||
try:
|
||||
st = p.stat()
|
||||
schema = _schema_for(str(p))
|
||||
return {
|
||||
"type": "file",
|
||||
"resolved": resolved,
|
||||
"found": True,
|
||||
"type": "file", "resolved": resolved, "found": True,
|
||||
"matches": 1,
|
||||
"files": [{
|
||||
"path": str(p),
|
||||
"size_bytes": st.st_size,
|
||||
"modified": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"),
|
||||
}],
|
||||
"files": [_file_info(str(p))],
|
||||
"schema": schema,
|
||||
}
|
||||
except OSError as e:
|
||||
return {"type": "file", "resolved": resolved, "found": False, "message": str(e)}
|
||||
return {"type": "file", "resolved": resolved, "found": False,
|
||||
"message": str(e), "schema": {}}
|
||||
|
||||
@@ -10,6 +10,7 @@ from app.api.configs import router as configs_router
|
||||
from app.core.settings import get_settings
|
||||
from app.views.auth import router as auth_router
|
||||
from app.views.views import router as dashboard_router
|
||||
from app.views.config_views import router as config_views_router
|
||||
from app.views.docs import router as docs_router
|
||||
|
||||
|
||||
@@ -38,5 +39,6 @@ def create_app() -> FastAPI:
|
||||
app.include_router(configs_router)
|
||||
app.include_router(auth_router)
|
||||
app.include_router(dashboard_router)
|
||||
app.include_router(config_views_router)
|
||||
app.include_router(docs_router)
|
||||
return app
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
"""Config list and editor views."""
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, Request
|
||||
from fastapi.responses import HTMLResponse
|
||||
from fastapi.templating import Jinja2Templates
|
||||
|
||||
from app.core.refdata import ReconPatterns, ReconConfigStatus
|
||||
from app.service.fake_configs import get_fake_configs
|
||||
|
||||
router = APIRouter(prefix="", tags=["Config_UI"])
|
||||
|
||||
_project_root = Path(__file__).resolve().parents[2]
|
||||
templates = Jinja2Templates(directory=str(_project_root / "data" / "templates"))
|
||||
|
||||
_EDITOR_CONTEXT = {
|
||||
"patterns": [p.value for p in ReconPatterns],
|
||||
"statuses": [s.value for s in ReconConfigStatus],
|
||||
"frequencies": ["Ad Hoc", "Intra Day", "Daily", "Weekly", "Monthly", "Quarterly"],
|
||||
"schema_types": ["str", "int", "float", "date('%Y-%m-%d')", "datetime('%Y-%m-%d %H:%M:%S')", "bool"],
|
||||
}
|
||||
|
||||
|
||||
@router.get("/configs", response_class=HTMLResponse)
|
||||
async def configs_list_view(request: Request):
|
||||
now = datetime.now()
|
||||
configs = get_fake_configs(now)
|
||||
return templates.TemplateResponse(
|
||||
request=request,
|
||||
name="configs_list.html",
|
||||
context={
|
||||
"title": "Recon Configs",
|
||||
"configs": configs,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/configs/new", response_class=HTMLResponse)
|
||||
async def config_new_view(request: Request):
|
||||
return templates.TemplateResponse(
|
||||
request=request,
|
||||
name="config_editor.html",
|
||||
context={
|
||||
"title": "New Config",
|
||||
"config": None,
|
||||
"is_new": True,
|
||||
**_EDITOR_CONTEXT,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/configs/{reference}/edit", response_class=HTMLResponse)
|
||||
async def config_edit_view(request: Request, reference: str):
|
||||
now = datetime.now()
|
||||
configs = get_fake_configs(now)
|
||||
config = next((c for c in configs if c.reference == reference), None)
|
||||
return templates.TemplateResponse(
|
||||
request=request,
|
||||
name="config_editor.html",
|
||||
context={
|
||||
"title": f"Edit — {reference}" if config else "Edit Config",
|
||||
"config": config,
|
||||
"reference": reference,
|
||||
"is_new": False,
|
||||
**_EDITOR_CONTEXT,
|
||||
},
|
||||
)
|
||||
@@ -14,7 +14,6 @@ from fastapi.templating import Jinja2Templates
|
||||
from app.models.recon_job import ReconJob # noqa: F401 (validates that the real model imports cleanly)
|
||||
from app.service.fake_jobs import FakeReconJob, get_fake_jobs
|
||||
from app.service.fake_configs import FakeReconConfig, get_fake_configs
|
||||
from app.core.refdata import ReconPatterns, ReconConfigStatus
|
||||
|
||||
|
||||
router = APIRouter(prefix="", tags=["User_Interface"])
|
||||
@@ -507,58 +506,3 @@ async def job_detail_view(request: Request, job_id: int):
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ── Config editor ────────────────────────────────────────────────────────────
|
||||
|
||||
_EDITOR_CONTEXT = {
|
||||
"patterns": [p.value for p in ReconPatterns],
|
||||
"statuses": [s.value for s in ReconConfigStatus],
|
||||
"frequencies": ["Ad Hoc", "Intra Day", "Daily", "Weekly", "Monthly", "Quarterly"],
|
||||
"schema_types": ["str", "int", "float", "date('%Y-%m-%d')", "datetime('%Y-%m-%d %H:%M:%S')", "bool"],
|
||||
}
|
||||
|
||||
|
||||
@router.get("/configs/new", response_class=HTMLResponse)
|
||||
async def config_new_view(request: Request):
|
||||
return templates.TemplateResponse(
|
||||
request=request,
|
||||
name="config_editor.html",
|
||||
context={
|
||||
"title": "New Config",
|
||||
"config": None,
|
||||
"is_new": True,
|
||||
**_EDITOR_CONTEXT,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/configs/{reference}/edit", response_class=HTMLResponse)
|
||||
async def config_edit_view(request: Request, reference: str):
|
||||
now = datetime.now()
|
||||
configs = get_fake_configs(now)
|
||||
config = next((c for c in configs if c.reference == reference), None)
|
||||
return templates.TemplateResponse(
|
||||
request=request,
|
||||
name="config_editor.html",
|
||||
context={
|
||||
"title": f"Edit — {reference}" if config else "Edit Config",
|
||||
"config": config,
|
||||
"reference": reference,
|
||||
"is_new": False,
|
||||
**_EDITOR_CONTEXT,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/configs", response_class=HTMLResponse)
|
||||
async def configs_list_view(request: Request):
|
||||
now = datetime.now()
|
||||
configs = get_fake_configs(now)
|
||||
return templates.TemplateResponse(
|
||||
request=request,
|
||||
name="configs_list.html",
|
||||
context={
|
||||
"title": "Recon Configs",
|
||||
"configs": configs,
|
||||
},
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user