From 8a89fe3403705462475fdf4c185db9c08a108d1b Mon Sep 17 00:00:00 2001 From: gramps Date: Thu, 2 Apr 2026 17:07:18 -0700 Subject: [PATCH] Architecture session: queue topology, log schema, REC templates, config refinements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename env.toml to env_{dev,qa,prod}.toml for self-documentation; enforce via gitignore - Config loader selects env file via BEDS_ENV environment variable, defaults to dev - Set wbid to "ms" in beds.toml - Define queue topology: rel/rec .read .write .obj, log, adm, mig - Define log event schema: compound event_id (node.env.guid), parent_id, depth, level/level_val, resource, service, env, node, file, method, line, trace, message, created - Add example_rec.toml — canonical self-documenting REC template for future developers - Add mst_logger_rec.toml — logger collection template derived from log event schema Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 4 +- config/beds.toml | 2 +- src/config/mod.rs | 5 +- templates/example_rec.toml | 449 ++++++++++++++++++++++++++++++++++ templates/mst_logger_rec.toml | 254 +++++++++++++++++++ 5 files changed, 711 insertions(+), 3 deletions(-) create mode 100644 templates/example_rec.toml create mode 100644 templates/mst_logger_rec.toml diff --git a/.gitignore b/.gitignore index 3a04e15..0e60c31 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /target -config/env.toml \ No newline at end of file +config/env_dev.toml +config/env_qa.toml +config/env_prod.toml \ No newline at end of file diff --git a/config/beds.toml b/config/beds.toml index bfafadf..f277bfe 100644 --- a/config/beds.toml +++ b/config/beds.toml @@ -7,7 +7,7 @@ journal_on = false [id] env_name = "production" version = "1.0" -wbid = "ga" +wbid = "ms" [broker_services] queue_tag = "prod_" diff --git a/src/config/mod.rs b/src/config/mod.rs index 7c97e39..94b23e4 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -10,9 +10,12 @@ pub enum ConfigError { } pub fn load() -> Result { + let env = std::env::var("BEDS_ENV").unwrap_or_else(|_| "dev".to_string()); + let env_file = format!("config/env_{}.toml", env); + let cfg = Config::builder() .add_source(File::new("config/beds.toml", FileFormat::Toml)) - .add_source(File::new("config/env.toml", FileFormat::Toml).required(false)) + .add_source(File::new(&env_file, FileFormat::Toml).required(false)) .build()?; Ok(cfg.try_deserialize()?) diff --git a/templates/example_rec.toml b/templates/example_rec.toml new file mode 100644 index 0000000..5043684 --- /dev/null +++ b/templates/example_rec.toml @@ -0,0 +1,449 @@ +# ============================================================================= +# BEDS REC Template — MongoDB Document Store +# ============================================================================= +# +# This is the canonical example template for a REC (document store) collection. +# Copy this file, rename it to match your collection (e.g.: users_rec.toml), +# and edit the values to define your schema. +# +# NAMING CONVENTIONS: +# ------------------- +# Template file: {collection_name}_rec.toml +# Collection name: {wbid}{CollectionName} e.g.: gaUsers (wbid "ga" from beds.toml [id]) +# Extension (TLA): _{3-char-identifier} e.g.: _usr +# All field names: {field_name}_{tla} e.g.: email_usr, status_usr +# +# The TLA (three-letter abbreviation) is appended to the collection name and +# to EVERY field name within the collection. This convention eliminates all +# ambiguity in queries, joins, and log output. BEDS enforces this at template +# validation time — templates that violate naming conventions are rejected. +# +# AUTHOR: beds +# VERSION: 1.0 +# ============================================================================= + + +# ============================================================================= +# IDENTITY +# ============================================================================= + +# Template version — increment when the schema changes. Not the BEDS version. +version = 1 + +# The BEDS node service this collection lives on. +# Valid values: "app_server" | "admin" | "segundo" | "tercero" | or any +# custom node name defined in your config file. +service = "app_server" + +# Storage engine for this template. REC templates always use "rec". +# Valid values: "rec" | "rel" +# This value is used by BEDS to select the correct adapter at instantiation. +schema = "rec" + +# Human-readable name for this template. Used in logging, admin UI, and the +# REST API catalog. Must be unique across all templates. +template_class = "ExampleCollection" + +# MongoDB collection name. Convention: {wbid}{ClassName} — no underscores. +# The wbid is your 2-char corporate identifier declared in beds.toml [id]. +# Example: wbid "ga" + class "Example" = "gaExample" +collection = "{wbid}Example" + +# Three-letter abbreviation (TLA) for this collection — appended to the +# collection name and to every field name. Must be unique across all templates. +# Example: "_usr", "_ord", "_log", "_ses" +extension = "_exa" # exa = example + +# Warehousing destination template name. Set to the name of the COOL storage +# template if this collection supports warehousing. Leave empty if not used. +wh_template = "" + + +# ============================================================================= +# BEHAVIOURAL FLAGS +# These flags control what operations BEDS will allow on this collection. +# ============================================================================= + +# closed_class: when true, only internal BEDS services may instantiate this +# template. When false, registered external partners may also access it. +# Default: true — restrict access unless you explicitly open it. +closed_class = true + +# hard_deletes: when true, DELETE operations permanently remove the record. +# When false, DELETE sets status to inactive (soft delete). Soft deletes are +# the safer default — records are recoverable. +hard_deletes = false + +# updates_enabled: when true, UPDATE operations are permitted on this +# collection. When false, records are immutable after insert. Log collections +# should always be false. +updates_enabled = true + +# auditing: controls which operations generate an audit trail. +# Valid values: +# "disabled" — no auditing +# "destructive" — audit writes and deletes only +# "nondestructive" — audit reads only +# "full" — audit all operations +auditing = "nondestructive" + +# journaling: when true, BEDS records a journal entry for every destructive +# operation (create, update, delete) on this collection. Requires the journal +# service to be active. Disable for high-volume append-only collections. +journaling = true + +# record_history: when true, BEDS maintains a full history of every version +# of each record. High storage cost — use only where full history is required. +record_history = false + +# default_status: the status value assigned to new records on insert. +# Must be a value defined in the BEDS status catalog. +default_status = "active" + +# search_status: the default status filter applied to fetch queries when no +# status filter is supplied by the caller. Protects against accidentally +# returning inactive or deleted records. +search_status = "active" + +# record_locking: when true, BEDS applies an optimistic lock on records during +# updates to prevent concurrent write conflicts. Enable for collections where +# simultaneous updates to the same record are possible. +record_locking = false + +# query_timers: when true, BEDS records query execution time for every +# operation on this collection. Timer data is published as a metric event. +# Recommended true in all environments for performance visibility. +query_timers = true + +# primary_key: the field used as the primary key for this collection. +# "token" uses the BEDS-generated GUID token (recommended). +# "mongo_id" uses MongoDB's native _id field. +primary_key = "token" + +# tokens: when true, BEDS generates and manages a GUID token field (db_token) +# for every record. This is the externally-exposed identifier — the internal +# MongoDB _id is never returned to clients. Strongly recommended true. +tokens = true + +# cache_ttl: number of seconds a fetched record remains in the in-process +# cache. 0 = caching disabled for this collection. Disable for collections +# with high write frequency where stale reads are unacceptable. +cache_ttl = 300 + +# is_internal: when true, marks this as a BEDS internal system collection. +# Internal collections are excluded from the public REST API catalog. +# Set true for logging, metrics, audit, and other framework collections. +is_internal = false + + +# ============================================================================= +# FIELDS +# Defines every field in the collection and its data type. +# BEDS validates all incoming data against this map before insertion. +# Fields not declared here are silently dropped. +# +# Field name convention: {field_name}_{tla} e.g.: email_usr +# +# Valid types: +# "string" — UTF-8 text +# "integer" — whole number +# "double" — floating point number +# "boolean" — true | false +# "object" — embedded document (sub-object) +# "array" — flat array of values +# "date" — MongoDB Date object (required for TTL indexes) +# ============================================================================= + +[fields] +# System fields — present in every REC template. Do not remove. +_id = "object" # MongoDB native document ID — never returned to clients +db_token = "string" # BEDS GUID token — the externally-exposed primary key +event_guid = "string" # broker event GUID — links this record to its origin event +status = "string" # record status (active | inactive | deleted) +created = "integer" # epoch timestamp — record creation time +accessed = "integer" # epoch timestamp — last read time + +# Application fields — define your schema below. +# Remember: every field name must end with the collection TLA (_exa here). +name_exa = "string" +score_exa = "double" +count_exa = "integer" +flag_exa = "boolean" +tags_exa = "array" # flat array — each element is a string +meta_exa = "object" # embedded sub-document +sub_exa = "array" # sub-collection — array of objects (see [sub_collections]) + + +# ============================================================================= +# PROTECTED FIELDS +# Fields that clients cannot modify or delete. Attempts to update or remove +# protected fields are silently dropped (or rejected, depending on config). +# Minimally: db_token, event_guid, created, accessed, _id +# ============================================================================= + +protected_fields = [ + "_id", + "db_token", + "event_guid", + "created", + "accessed", +] + + +# ============================================================================= +# INDEX FIELDS +# Authoritative list of every field that participates in ANY index declaration +# below. BEDS uses this list to validate queries at submission time — a query +# that cannot be satisfied by a declared index is rejected before execution. +# +# If a field is not in this list, it cannot be used as a query discriminant. +# Warehouse indexes are limited to: created, db_token, event_guid only. +# ============================================================================= + +index_fields = [ + "_id", + "db_token", + "event_guid", + "status", + "created", + "accessed", + "name_exa", + "score_exa", + "count_exa", + "flag_exa", + "meta_exa", +] + + +# ============================================================================= +# INDEX NAME REGISTRY +# All explicitly-named indexes (compound, multikey) must be registered here. +# Single-field indexes and unique indexes do not require names. +# If no named indexes are declared below, set to empty array: [] +# ============================================================================= + +index_name_list = [ + "cIdx1Exa", # compound index — count_exa + score_exa + "mIdx1Exa", # multikey index — tags_exa sub-field +] + + +# ============================================================================= +# SINGLE-FIELD INDEXES +# One entry per field. Sort direction: 1 = ascending, -1 = descending. +# _id is the default MongoDB index — never declare it here. +# If a field is declared here, do NOT also declare it as a compound index field +# that is its own single entry. +# ============================================================================= + +[single_field_indexes] +db_token = 1 # token should always be indexed +event_guid = 1 # event GUID should always be indexed +status = -1 # fetch by status is a common query pattern +created = -1 # descending — most recent first +flag_exa = 1 +name_exa = 1 + + +# ============================================================================= +# COMPOUND INDEXES +# Format: index_name = [ [field, direction], [field, direction], ... ] +# MongoDB does not use index labels internally — the name is for BEDS only. +# The index name must appear in index_name_list above. +# ============================================================================= + +[compound_indexes] +cIdx1Exa = [["count_exa", 1], ["score_exa", -1]] + + +# ============================================================================= +# MULTIKEY INDEXES +# For fields declared as "array" type. MongoDB automatically applies multikey +# behaviour to array fields declared in single or compound indexes. +# Use dot notation to index a subset of array sub-fields. +# The index name must appear in index_name_list above. +# Format: index_name = [ [dot.notation.field, direction], ... ] +# ============================================================================= + +[multikey_indexes] +mIdx1Exa = [["tags_exa.label", 1]] + + +# ============================================================================= +# UNIQUE INDEXES +# MongoDB rejects inserts/updates that would create a duplicate value. +# db_token should always be unique. Add other fields as required. +# Format: field_name = sort_direction +# ============================================================================= + +[unique_indexes] +db_token = 1 # BEDS token is always unique — do not remove + + +# ============================================================================= +# PARTIAL INDEXES +# Indexes that only include documents matching a filter expression. +# More efficient than full indexes for sparse or conditional data. +# Format: array of [ [field, direction], [filter_expression] ] pairs. +# +# Example below: index on meta_exa only where created field exists. +# ============================================================================= + +[[partial_indexes]] +field = "meta_exa" +direction = 1 +filter = { "created" = { "$exists" = true } } + +[[partial_indexes]] +field = "meta_exa" +direction = 1 +filter = { "created" = { "$exists" = false } } + + +# ============================================================================= +# TTL INDEXES +# Automatically delete documents after a specified number of seconds. +# Can only be applied to fields of type "date" or arrays of dates. +# MongoDB uses the earliest date in an array field to calculate expiry. +# Format: field_name = seconds_until_expiry +# ============================================================================= + +[ttl_indexes] +# accessed = 86400 # example: expire records not accessed in 24 hours +# # commented out — enable only if TTL expiry is required + + +# ============================================================================= +# CACHE MAP +# Maps internal field names (schema) to external field names (client-facing). +# This is the ONLY mechanism by which schema column names are exposed to +# clients. Fields not in the cache map are never returned to clients. +# If cache_map is empty, all fields in exposed_fields are returned as-is. +# +# Format: internal_field_name = "external_label" +# ============================================================================= + +[cache_map] +db_token = "id" +event_guid = "eventId" +status = "status" +created = "createdDate" +accessed = "accessedDate" +name_exa = "name" +score_exa = "score" +count_exa = "count" +flag_exa = "flag" +tags_exa = "tags" +meta_exa = "meta" +sub_exa = "items" + + +# ============================================================================= +# EXPOSED FIELDS +# When cache_map is null or disabled, this list controls which fields are +# returned to clients. If both cache_map and exposed_fields are null, all +# fields are returned. _id and internal system fields should never be exposed. +# Leave null if using cache_map — cache_map always takes precedence. +# ============================================================================= + +# exposed_fields = null # null = use cache_map (recommended) + + +# ============================================================================= +# BINARY FIELDS +# Fields that contain binary data require special handling by the adapter. +# List any binary fields here. null = no binary fields. +# ============================================================================= + +# binary_fields = null + + +# ============================================================================= +# REGEX FIELDS +# Fields from index_fields that support regex pattern matching in queries. +# This does not create an index — it controls when BEDS applies a regex +# operand to a query filter. Only declare string fields here. +# ============================================================================= + +regex_fields = ["name_exa"] + + +# ============================================================================= +# SUB-COLLECTIONS +# Implements 1:M relationships at the document level. A sub-collection is an +# array of objects embedded within the parent document. +# +# Example: a "questions" collection with an embedded "answers" sub-collection. +# +# Each sub-collection field must also appear in [fields] (to declare its type) +# and in [cache_map] or exposed_fields (to control client visibility). +# +# Sub-collection elements can be inserted or deleted independently without +# modifying the parent document's other fields. +# +# Format: +# [sub_collections.field_name] +# fields = ["sub_field_1", "sub_field_2", ...] +# ============================================================================= + +[sub_collections.sub_exa] +# These are the fields within each element of the sub_exa array. +# They must also appear in [fields] above with their types declared. +fields = [ + "count_exa", + "score_exa", + "name_exa", + "flag_exa", + "db_token", +] + + +# ============================================================================= +# WAREHOUSING +# Controls data lifecycle management — moving records from HOT (production) +# storage to COOL (warehoused) or COLD (archived/CSV) storage over time. +# +# HOT — live production data +# COOL — warehoused, maintains schema, indexing changes allowed +# COLD — archived, reformatted to destination schema (usually CSV) +# WARM — data being restored from COLD back to HOT +# ============================================================================= + +[warehouse] +# supported: master switch. Must be true for any warehousing to occur. +# If false, all warehousing requests for this collection are rejected. +supported = false + +# remote_support: when true, allows importing data into this collection from +# a remote source outside the BEDS framework (e.g.: a legacy system migration). +remote_support = false + +# automated: when true, BEDS will automatically warehouse records when the +# qualifier condition is met, on the defined interval schedule. +automated = false + +# dynamic: when true, allows on-demand warehousing requests outside the +# scheduled interval. Requires override = true to allow custom query filters. +dynamic = false + +# interval: how often automated warehousing runs. +# Valid values: "D" = daily | "M" = monthly (1st) | "Q" = quarterly | "Y" = yearly +interval = "M" + +# override: when true AND dynamic = true, the caller may supply a custom +# query filter in the warehousing request, overriding the default qualifier. +override = false + +# delete: what to do with source records after successful warehousing. +# Valid values: "H" = hard delete | "S" = soft delete +delete = "H" + +# qualifier: the filter that identifies records eligible for warehousing. +# Format matches a BEDS fetch query — field, operand, operator, value. +# The null value below is replaced at runtime with the caller-supplied date. +# +# This example: warehouse all active records created before a supplied date. +[warehouse.qualifier] +created = { operand = "null", operator = "lt", value = "" } # empty = caller supplies value at runtime +status = { operand = "null", operator = "eq", value = "active" } +logical_op = "and" diff --git a/templates/mst_logger_rec.toml b/templates/mst_logger_rec.toml new file mode 100644 index 0000000..8aaae55 --- /dev/null +++ b/templates/mst_logger_rec.toml @@ -0,0 +1,254 @@ +# ============================================================================= +# BEDS REC Template — Logger Collection +# ============================================================================= +# +# This template defines the schema for the BEDS logging collection. +# All BEDS nodes publish log events to this collection via the LOG exchange. +# Routing key format: log.{env}.{level} +# +# DESIGN NOTES: +# ------------- +# - Append-only: updates are disabled. Log records are immutable. +# - No caching: recursion risk and log data is never stale-read. +# - No auditing: logging the logger creates infinite recursion. +# - No journaling: same reason. +# - No record locking: append-only makes locking unnecessary. +# - Hard deletes enabled: log pruning/warehousing removes records permanently. +# - is_internal = true: excluded from the public REST API catalog. +# - TTL index on created: logs expire automatically per retention policy. +# +# AUTHOR: ms +# VERSION: 1.0 +# ============================================================================= + + +# ============================================================================= +# IDENTITY +# ============================================================================= + +version = 1 +service = "admin" +schema = "rec" +template_class = "Logger" +collection = "msLogs" +extension = "_log" +wh_template = "" + + +# ============================================================================= +# BEHAVIOURAL FLAGS +# ============================================================================= + +closed_class = true # internal framework collection — no external access +hard_deletes = true # log pruning permanently removes records +updates_enabled = false # log records are immutable after insert +auditing = "disabled"# never audit the logger — infinite recursion +journaling = false # never journal the logger — same reason +record_history = false # no history tracking on log records +default_status = "active" +search_status = "active" +record_locking = false # append-only — locking unnecessary +query_timers = false # disabled — timer events would log, causing recursion +primary_key = "token" +tokens = true +cache_ttl = 0 # no caching — log data is never stale-read +is_internal = true # excluded from public REST API catalog + + +# ============================================================================= +# FIELDS +# ============================================================================= + +[fields] +# System fields +_id = "object" # MongoDB native document ID — never returned to clients +db_token = "string" # BEDS GUID token — externally-exposed primary key +status = "string" # record status + +# Event lineage +event_id = "string" # compound event ID: node.env.guid — broker event identifier +parent_id = "string" # parent broker event compound ID — empty string if root event +depth = "integer" # levels from root event (0 = root) + +# Log level +level_log = "string" # log level label: debug|data|info|error|warning|fatal|timer|event +level_val = "integer" # log level integer: -1 through 7 (enables range queries) + +# Origin +resource = "string" # 4-char component identifier e.g. LOGR — matches console output tag +service_log = "string" # node role that issued the event e.g. app_server|admin|logger +env_log = "string" # environment: dev|qa|prod +node_log = "string" # node name from config e.g. registered_users + +# Source location +file_log = "string" # source file where the log call originated +method_log = "string" # calling method name +line_log = "integer" # line number of the log call + +# Payload +message_log = "string" # the log message text +trace_log = "array" # stack trace — empty array unless trace=true passed to logger + +# Timestamp +created = "integer" # epoch timestamp — record creation time + + +# ============================================================================= +# PROTECTED FIELDS +# ============================================================================= + +protected_fields = [ + "_id", + "db_token", + "event_id", + "parent_id", + "depth", + "created", +] + + +# ============================================================================= +# INDEX FIELDS +# ============================================================================= + +index_fields = [ + "_id", + "db_token", + "event_id", + "parent_id", + "depth", + "level_val", + "level_log", + "node_log", + "env_log", + "service_log", + "created", + "status", +] + + +# ============================================================================= +# INDEX NAME REGISTRY +# ============================================================================= + +index_name_list = [ + "cIdx1Log", # compound: event_id + depth — full lineage traversal + "cIdx2Log", # compound: env_log + level_val — range queries by env and severity +] + + +# ============================================================================= +# SINGLE-FIELD INDEXES +# ============================================================================= + +[single_field_indexes] +db_token = 1 +parent_id = 1 # traverse up the event tree +level_log = 1 # filter by level label +node_log = 1 # filter by originating node +created = -1 # most recent first +status = -1 + + +# ============================================================================= +# COMPOUND INDEXES +# ============================================================================= + +[compound_indexes] +cIdx1Log = [["event_id", 1], ["depth", 1]] # reconstruct full event tree +cIdx2Log = [["env_log", 1], ["level_val", 1]] # prod fatals, dev debug, etc. + + +# ============================================================================= +# MULTIKEY INDEXES +# ============================================================================= + +# trace_log is an array but we do not index it — trace data is fetched by +# event_id, not searched. No multikey indexes required for this collection. + + +# ============================================================================= +# UNIQUE INDEXES +# ============================================================================= + +[unique_indexes] +db_token = 1 + + +# ============================================================================= +# PARTIAL INDEXES +# ============================================================================= + +# No partial indexes for this collection. + + +# ============================================================================= +# TTL INDEXES +# Log retention policy — records expire automatically. +# Adjust the value to match your retention requirement. +# 2592000 = 30 days +# ============================================================================= + +[ttl_indexes] +# 2592000 = 30 days. Adjust to match your log retention policy. +created = 2592000 + + +# ============================================================================= +# CACHE MAP +# Log records are internal only — cache map controls what the admin UI sees. +# Schema field names are never exposed externally. +# ============================================================================= + +[cache_map] +db_token = "id" +event_id = "eventId" +parent_id = "parentId" +depth = "depth" +level_log = "level" +level_val = "levelValue" +resource = "resource" +service_log = "service" +env_log = "env" +node_log = "node" +file_log = "file" +method_log = "method" +line_log = "line" +message_log = "message" +trace_log = "trace" +created = "createdDate" + + +# ============================================================================= +# REGEX FIELDS +# ============================================================================= + +regex_fields = ["message_log", "file_log"] + + +# ============================================================================= +# SUB-COLLECTIONS +# ============================================================================= + +# No sub-collections for this collection. + + +# ============================================================================= +# WAREHOUSING +# Log records are eligible for warehousing once they exceed the retention +# threshold. Warehouse to COOL storage to maintain schema for audit purposes. +# ============================================================================= + +[warehouse] +supported = true +remote_support = false +automated = true +dynamic = false +interval = "M" # warehouse monthly +override = false +delete = "H" # hard delete source records after warehousing + +[warehouse.qualifier] +created = { operand = "null", operator = "lt", value = "" } # caller supplies cutoff date +status = { operand = "null", operator = "eq", value = "active" } +logical_op = "and"