Candidate { // C → B, D, E
candidate_id // hash(normalized_url, unit_index)
content_hash // re-encounter / re-mine
canonical_entity_key // norm(entity)+category+province — NOT unique
entity_name, aliases[]
category_id, province_id, element_id, origin
attributes{} // category-specific; typed for the 3 pilot cats
facts[]{ claim, source_url, source_lang, quote }
images[]{ source_url, caption, license_hint } // refs only
provenance{ lead_chain[], crawl_path[], doc_page? }
source_language, extraction_version, extraction_confidence
}
Decision { // D → E ; trace → dashboard
candidate_id, canonical_entity_key
cultural_gate pass | reject
novelty_verdict skip | enrich | net-new
target_pdbi_id? // set on skip(dup) / enrich
cross_validation accept | reject
confidence, rationale, evidence_refs{ neighbors[], sources[] }
labels{ is_duplicate?, is_accepted? } // → calibration
trace_id // assembled context + tool calls + output
}
Entry { // E → PDBI ; → B re-encode
candidate_id, decision_id
mode shadow | trickle | live
action create | update(target_pdbi_id)
title, field_category, field_element, field_province, field_from
field_description // dense factual HTML; written for OSAN indexing
field_file? // berkas ref after upload
citations[], image{ object_ref, generated_caption, pdbi_berkas_ref }
pdbi_id?, publish_status, reencoded, audit_sampled, sample_reason
}