Skip to main content

Data Model

:::info Source Sourced from services/media-service/DATA_MODEL.md in the documentation repo. :::

1. Database

Postgres schema media. S3/R2 for blobs.

2. Tables

2.1 assets

CREATE TABLE media.assets (
id ULID PRIMARY KEY,
tenant_id UUID NOT NULL,
owner_user_id ULID NOT NULL,
kind TEXT NOT NULL CHECK (kind IN ('image','audio','video','document','subtitle','ai_image','ai_audio')),
source TEXT NOT NULL CHECK (source IN ('upload','ai_generated','imported')),
status TEXT NOT NULL CHECK (status IN ('uploading','scanning','transcoding','ready','failed','quarantined')),
mime TEXT,
bucket TEXT NOT NULL,
key TEXT NOT NULL,
size_bytes BIGINT,
sha256 CHAR(64),
original_filename TEXT,
width INT, height INT, duration_seconds INT,
ai_provenance JSONB,
retention_class TEXT NOT NULL DEFAULT 'operational',
retention_until TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now(),
deleted_at TIMESTAMPTZ
);
CREATE INDEX assets_by_tenant ON media.assets (tenant_id, created_at DESC);
CREATE INDEX assets_by_owner ON media.assets (tenant_id, owner_user_id);
CREATE INDEX assets_sha256_dedup ON media.assets (tenant_id, sha256);
CREATE INDEX assets_status ON media.assets (status);

2.2 variants

CREATE TABLE media.variants (
id ULID PRIMARY KEY,
asset_id ULID NOT NULL REFERENCES media.assets(id) ON DELETE CASCADE,
tenant_id UUID NOT NULL,
profile TEXT NOT NULL,
bucket TEXT NOT NULL,
key TEXT NOT NULL,
size_bytes BIGINT,
mime TEXT,
width INT, height INT, bitrate INT,
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX variants_by_asset ON media.variants (asset_id);

2.3 caption_tracks

CREATE TABLE media.caption_tracks (
id ULID PRIMARY KEY,
asset_id ULID NOT NULL,
tenant_id UUID NOT NULL,
language TEXT NOT NULL,
kind TEXT NOT NULL CHECK (kind IN ('captions','subtitles','descriptions')),
format TEXT NOT NULL CHECK (format IN ('vtt','srt')),
bucket TEXT NOT NULL,
key TEXT NOT NULL,
source TEXT NOT NULL CHECK (source IN ('ai','human','imported')),
ai_provenance JSONB,
approved BOOLEAN DEFAULT false,
reviewed_by ULID,
reviewed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now()
);

2.4 transcripts

CREATE TABLE media.transcripts (
id ULID PRIMARY KEY,
asset_id ULID NOT NULL,
tenant_id UUID NOT NULL,
language TEXT NOT NULL,
segments JSONB NOT NULL, -- array of { startMs, endMs, text, speakerLabel? }
source TEXT NOT NULL,
ai_provenance JSONB,
created_at TIMESTAMPTZ DEFAULT now()
);

2.5 transcode_jobs

CREATE TABLE media.transcode_jobs (
id ULID PRIMARY KEY,
asset_id ULID NOT NULL,
tenant_id UUID NOT NULL,
profile TEXT NOT NULL,
status TEXT NOT NULL,
attempts INT DEFAULT 0,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
error_message TEXT
);

2.6 ai_media_artifacts

CREATE TABLE media.ai_media_artifacts (
id ULID PRIMARY KEY,
tenant_id UUID NOT NULL,
kind TEXT NOT NULL,
prompt_ref TEXT,
output_asset_id ULID,
cost_micro_usd BIGINT,
ai_provenance JSONB NOT NULL,
created_at TIMESTAMPTZ DEFAULT now()
);

2.7 outbox, inbox

Standard.

3. RLS on every tenant-scoped table.

4. Storage (S3)

Structure: tenants/{tid}/media/{asset_id}/original, .../variants/{profile}/{asset_id}, .../captions/{lang}.vtt.

5. Retention

  • Operational (uploaded but not in a bundle): per tenant policy, default 90 days after last reference.
  • Bundle-referenced: indefinite or per bundle TTL.
  • AI-generated: same as operational unless authored.

6. Performance

  • CDN for variants + captions.
  • Signed URLs short-lived (10 min).
  • Hot assets in CDN; cold moves to Glacier after 1 year unused.

7. Migration

Additive only. Variant profiles extensible. New kinds additive.