About This Code Showcase
This curated code showcase demonstrates the core translation pipeline: how the AI translation prompt encodes three quality standards with glossary injection, how the stage gate logic enforces human governance, and how the recursive unwrapping function handles structured AI output.
Environment configuration and deployment scripts are omitted for clarity. This showcase highlights the AI agent prompts, backend stage logic, and output parsing.
File Structure
projects/bahai-chinese-translation-workbench/
├── app.py # FastAPI server — routes, stage gate logic
├── agents.py # AI agents — translation & editing prompts
├── db.py # SQLite database — schema, CRUD, audit log
├── glossary.json # 20-term terminology glossary (JSON)
├── index.html # Frontend UI — 3-stage pipeline interface
├── requirements.txt # Python dependencies
├── Dockerfile # Container build for Cloud Run
├── start.bat # Windows quick-start script
└── workbench.db # SQLite database file (auto-created)
Three-Standard Translation Prompt
The translation agent's system prompt encodes the three quality standards and injects the full terminology glossary. This is the core of how the AI understands the translation requirements.
TRANSLATION_SYSTEM_PROMPT = """You are translating Baha'i Sacred Writings into Chinese (简体中文).
Three standards govern your translation:
1. ACCURACY (准确): Faithful to the original meaning. Never add, omit, or reinterpret.
2. BEAUTY (文风优美): Elevated, literary Chinese register. Not colloquial.
The language must carry the weight and dignity of sacred scripture.
Follow the poetic, classical-influenced modern Chinese style —
not contemporary casual language.
3. CONSISTENCY (风格一致): Consistent with the translation style established
by Shoghi Effendi (the Guardian). Use formal, classical-influenced
modern Chinese. Use Chinese punctuation marks (,。;:!?""''《》).
TERMINOLOGY GLOSSARY — You MUST use these approved translations:
{glossary_block}
RULES:
- Translate the complete text. Do not summarize or skip any passage.
- Preserve paragraph structure from the source.
- For terms in the glossary, use the approved Chinese translation exactly.
- For proper nouns not in the glossary, transliterate and add the original
in parentheses on first occurrence.
- Do not add explanatory notes or commentary within the translation itself.
Return your output as JSON with these keys:
- "translation": the complete Chinese translation (string)
- "term_usage": list of glossary terms you applied
- "notes": any translator notes on difficult passages or choices made
Return ONLY the JSON object, no other text."""
def translation_agent(source_text, source_lang, glossary):
"""Stage 1: Generate Chinese translation draft from source text."""
client = _get_client()
glossary_block = format_glossary_for_prompt(glossary)
system_prompt = TRANSLATION_SYSTEM_PROMPT.replace("{glossary_block}", glossary_block)
lang_label = {"en": "English", "ar": "Arabic", "fa": "Persian"}.get(source_lang, "English")
response = client.messages.create(
model=MODEL,
max_tokens=4096,
temperature=0.3,
system=system_prompt,
messages=[{"role": "user", "content": f"Translate the following {lang_label} text into Chinese:\n\n{source_text}"}],
)
response_text = response.content[0].text
parsed = _parse_json_response(response_text)
if parsed and "translation" in parsed:
result = {"translation": parsed["translation"],
"term_usage": parsed.get("term_usage", []),
"notes": parsed.get("notes", "")}
else:
result = {"translation": response_text, "term_usage": [],
"notes": "Warning: Could not parse structured response."}
return result
Stage Gate Logic
The backend enforces strict stage sequencing. The review endpoint validates that the document is at Stage 2 and routes based on the human reviewer's decision (approve, edit, or reject).
@app.post("/api/documents/{doc_id}/review")
def review_doc(doc_id: int, req: ReviewRequest):
doc = get_document(doc_id)
if doc is None:
raise HTTPException(status_code=404, detail="Document not found")
if doc["current_stage"] != 2:
raise HTTPException(status_code=400,
detail="Document is not at Stage 2 (review)")
stages = get_stage_outputs(doc_id)
stage1_output = next((s for s in stages if s["stage"] == 1), None)
if req.decision == "approve":
save_stage_output(doc_id=doc_id, stage=2,
input_text=stage1_output["output_text"],
output_text=stage1_output["output_text"],
operator="human", human_notes=req.notes)
log_audit(doc_id, "stage2_approved")
update_document_stage(doc_id, 3)
elif req.decision == "edit":
save_stage_output(doc_id=doc_id, stage=2,
input_text=stage1_output["output_text"],
output_text=req.edited_text,
operator="human", human_notes=req.notes)
log_audit(doc_id, "stage2_edited")
update_document_stage(doc_id, 3)
elif req.decision == "reject":
log_audit(doc_id, "stage2_rejected")
return _build_response(doc_id)
Recursive JSON Unwrapping
LLMs sometimes wrap output in markdown code fences or nested JSON. The _unwrap_text function recursively strips these layers to extract the actual translation text.
def _unwrap_text(raw, target_keys=None):
"""Recursively unwrap JSON/markdown until plain text is found.
target_keys: list of JSON keys to extract
(e.g. ['translation', 'edited_text', 'typeset_text'])
"""
if target_keys is None:
target_keys = ["translation", "edited_text", "typeset_text"]
text = raw
for _ in range(5):
text = _strip_markdown_fences(text)
if text.strip().startswith("{"):
try:
data = json.loads(text)
extracted = None
for k in target_keys:
if k in data and isinstance(data[k], str):
extracted = data[k]
break
if extracted:
text = extracted
continue
else:
break
except (json.JSONDecodeError, TypeError):
break
else:
break
return text