Update app/r.py
This commit is contained in:
@@ -0,0 +1,175 @@
|
||||
# The job config is stored in the DB as a JSON document. These job config classes are used to conform the JSON.
|
||||
# This is done for several reasons:
|
||||
# - We have a record of what data was in the API call rather than a schema migrated record
|
||||
# - We don't need to maintain the many config tables that would result - just the job config JSON field.
|
||||
# - The pattern selected for a recon job defines what config is needed so a 3rd normal form would be complex
|
||||
# The recon workers use the loaded JSON as a dictionary so must deal with confg schema changes gracefully.
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Dict
|
||||
from enum import Enum
|
||||
import operator
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.core.refdata import ReconPatterns, ReconConfigStatus, ProfileFields
|
||||
from app.models.recon_auth import UserResponse
|
||||
from app.db.schema import ReconConfig as ReconConfigSchema
|
||||
from app.core.config import db_config_map
|
||||
|
||||
class Frequencies(str, Enum):
|
||||
AD_HOC = "Ad Hoc"
|
||||
INTRA_DAY = "Intra Day"
|
||||
DAILY = "Daily"
|
||||
WEEKLY = "Weekly"
|
||||
MONTHLY = "Monthly"
|
||||
QUARTERLY = "Quarterly"
|
||||
|
||||
|
||||
class ComparisonOperators(str, Enum):
|
||||
GREATER_THAN = "Greater Than"
|
||||
LESS_THAN = "Less Than"
|
||||
EQUAL_TO = "Equal To"
|
||||
|
||||
@property
|
||||
def to_symbol(self):
|
||||
mapping = {
|
||||
self.GREATER_THAN.value: ">",
|
||||
self.LESS_THAN.value: "<",
|
||||
self.EQUAL_TO.value: "=",
|
||||
}
|
||||
return mapping[self.value]
|
||||
|
||||
@property
|
||||
def to_operator(self):
|
||||
mapping = {
|
||||
self.GREATER_THAN.value: operator.gt,
|
||||
self.LESS_THAN.value: operator.lt,
|
||||
self.EQUAL_TO.value: operator.eq,
|
||||
}
|
||||
return mapping[self.value]
|
||||
|
||||
|
||||
class ProfileThreshold(BaseModel):
|
||||
profile_field: ProfileFields
|
||||
value: float
|
||||
test: ComparisonOperators
|
||||
|
||||
model_config = {
|
||||
"use_enum_values": True # Always output enum values
|
||||
}
|
||||
|
||||
|
||||
class CSVSpec(BaseModel):
|
||||
delimiter: str = Field(default=',')
|
||||
header: bool = Field(default=True)
|
||||
encoding: str = Field(default='utf_8')
|
||||
trailer_rows: int = Field(default=0)
|
||||
quoting: bool = Field(default=True)
|
||||
|
||||
|
||||
class XMLSpec(BaseModel):
|
||||
xpathstr: str = Field(default='./*')
|
||||
encoding: str = Field(default='utf-8')
|
||||
|
||||
|
||||
url_description = f"URL to source. If CSV, date can be specified via template which has these variables available: today, as_at_date, as_at_offset (job date + system config offset), today_offset. For DB connections we map connection name to correct name for the environment. Options are: {db_config_map.keys()} "
|
||||
url_examples = [
|
||||
"file:///Z:/PredatorArchive/PredatorArchive_Customer_File_{{as_at_date.strftime('%Y%m%d')}}.csv",
|
||||
"file:///Z:/PredatorArchive/PredatorArchive_Customer_File_{{today.strftime('%Y%m%d')}}.csv",
|
||||
"oracle://NETREVEAL?db=NETREVEAL&table=CUSTOMERS",
|
||||
"snowflake://DATAPRODUCT?db=PDP_ONYX&schema=ODS&table=CUSTOMERS_SCD",
|
||||
"mssql://GROUPDW?db=BIODS_PROCESSING&table=CUSTOMERS"
|
||||
]
|
||||
|
||||
schema_description = 'An ordered dictionary of column names & column types. Types can be "int","str","date(\'format\')","datetime(\'format\')","float"'
|
||||
schema_examples = [{},{"my column name":"str", "an integer column":"int", "Update_Datetime":"datetime(%Y-%m-%d %H:%M:%S.%f)", "a float field":"float"}]
|
||||
filter_description = "A list of filters to load only certain rows. For a SQL connection this will appear in the WHERE part of the SELECT statement. The comparison can use the date fields (today, as_at_date, as_at_offset, today_offset) with 'strftime' formatting."
|
||||
|
||||
|
||||
class SystemConfig(BaseModel):
|
||||
url: str = Field(description=url_description, examples=url_examples)
|
||||
name: str = Field(default="<system name>")
|
||||
comment: Optional[str] = Field(default="")
|
||||
csv_spec: Optional[CSVSpec] = Field(default=None)
|
||||
xml_spec: Optional[XMLSpec] = Field(default=None)
|
||||
day_offset: int = Field(default=0, description="When a config is run, for a given date, what is the number of days difference to the date in the filename or slq query.")
|
||||
system_schema: Dict[str, str] = Field(default={}, description=schema_description, examples=schema_examples)
|
||||
obfuscate_fields: list[str] = Field(default=[])
|
||||
pci_redact_fields: list[str] = Field(default=[])
|
||||
field_widths: list[int] = Field(default=[])
|
||||
profile_thresholds: List[ProfileThreshold] = Field(default=[])
|
||||
index_fields: list[str] = Field(default=[])
|
||||
filter: Optional[str] = Field(default="", description="If provided, appended to the sql query (file and database supported) after the WHERE clause.")
|
||||
sql: Optional[str] = Field(default="", description="If provided, replaces entire sql query (database supported, file tbd)")
|
||||
|
||||
|
||||
field_mapping_description = 'A dictionary of <source>:<destination> column names where column names differ but values should reconcile. For columns not mapped here, it is expected that source(s) and destination have the same column names. Note: With CSVs the names from the system_schema field will override the column names in the header so column matching can be done this way.'
|
||||
field_mapping_examples = [{},{"my source column 1":"my dest column 1"}]
|
||||
|
||||
|
||||
class ReconConfigRequest(BaseModel):
|
||||
reference: str = Field(default="Unnamed", description="User supplied id of the config")
|
||||
name: str = Field(default="Unnamed", description="Only used to name the results.")
|
||||
business_process: str = Field(default="Unnamed Process", description="Only used in the results.")
|
||||
comment: Optional[str] = None
|
||||
data_type: str = Field(default="Unknown", description="10000 foot view of what the data in the recon relates to. E.g. Customer or Transaction")
|
||||
sources: List[SystemConfig]
|
||||
destination: SystemConfig
|
||||
pattern: ReconPatterns
|
||||
status: ReconConfigStatus = Field(description="This controls where the result of recon jobs using this config go. 'draft' results will go to the draft snowflake tables, 'published' will go to the live snowflake tables which power the recon dashboards.")
|
||||
field_mapping: Dict[str, str] = Field(default={}, description=field_mapping_description, examples=field_mapping_examples)
|
||||
|
||||
# Unlike the other recon config attributes, these map to database columns, not part of the config JSON.
|
||||
frequency: Frequencies = Field(default = Frequencies.AD_HOC)
|
||||
start_datetime: Optional[datetime] = None
|
||||
|
||||
model_config = {
|
||||
"use_enum_values": True # Always output enum values
|
||||
}
|
||||
|
||||
|
||||
class ReconConfigResponse(ReconConfigRequest):
|
||||
"""
|
||||
Response when serializing
|
||||
"""
|
||||
# reference: str
|
||||
revision_number: int
|
||||
user: UserResponse
|
||||
|
||||
|
||||
class ReconConfig(ReconConfigRequest):
|
||||
"""
|
||||
Internal Representation of the config object.
|
||||
"""
|
||||
# reference: str
|
||||
revision_number: int
|
||||
username: str
|
||||
|
||||
model_config = {
|
||||
"from_attributes": True, # Ability to load from ORM model
|
||||
# "use_enum_values": False, # TODO: ORM loading needs to load enums properly
|
||||
}
|
||||
|
||||
|
||||
def config_from_schema(recon_config_schema: ReconConfigSchema) -> ReconConfig:
|
||||
""" Config uses the json 'config' field of the db to allow non-breaking schema changes
|
||||
but also adds read only fields from the other table columns.
|
||||
- 2 user writable fields are now in the ReconConfigSchema: frequency and start_datetime. This is to allow filtering of configs for scheduling.
|
||||
We use username over User object so we don't need to do an extra db lookup for the user table.
|
||||
"""
|
||||
recon_config_schema.config["reference"] = recon_config_schema.reference
|
||||
recon_config_schema.config["revision_number"] = recon_config_schema.revision_number
|
||||
recon_config_schema.config["username"] = recon_config_schema.username
|
||||
|
||||
if recon_config_schema.frequency is not None:
|
||||
recon_config_schema.config["frequency"] = recon_config_schema.frequency
|
||||
else:
|
||||
recon_config_schema.config["frequency"] = Frequencies.AD_HOC.value
|
||||
if recon_config_schema.start_datetime is not None:
|
||||
recon_config_schema.config["start_datetime"] = recon_config_schema.start_datetime.isoformat()
|
||||
else:
|
||||
recon_config_schema.config["start_datetime"] = None
|
||||
|
||||
recon_config_model = ReconConfig.model_validate(recon_config_schema.config)
|
||||
return recon_config_model
|
||||
|
||||
Reference in New Issue
Block a user