准备工作
操作系统
CentOS Stream 9 (x86)cat /etc/os-release NAME="CentOS Stream" VERSION="9" ID="centos" ID_LIKE="rhel fedora" VERSION_ID="9" PLATFORM_ID="platform:el9" PRETTY_NAME="CentOS Stream 9" ANSI_COLOR="0;31" LOGO="fedora-logo-icon" CPE_NAME="cpe:/o:centos:centos:9" HOME_URL="https://centos.org/" BUG_REPORT_URL="https://issues.redhat.com/" REDHAT_SUPPORT_PRODUCT="Red Hat Enterprise Linux 9" REDHAT_SUPPORT_PRODUCT_VERSION="CentOS Stream"已安装docker compose
Docker version 29.5.3, build d1c06ef Docker Compose version v5.1.4
一、目录结构
gongwen/
├── docker-compose.yml
├── Dockerfile
└──main.py
二、创建配置文件
创建目录
mkdir -p gongwen/ && cd gongwen部署文件
cat > docker-compose.yml << 'EOF'
services:
gongwen-api:
build: .
container_name: gongwen-api
restart: unless-stopped
ports:
- "8765:8765"
EOF镜像构件文件
cat > Dockerfile << 'EOF'
FROM python:3.11-slim
# 安装 Node.js(用于 docx.js 生成 Word 文件)
RUN apt-get update && apt-get install -y curl && \
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt-get install -y nodejs && \
apt-get clean && rm -rf /var/lib/apt/lists/*
WORKDIR /app
# 安装 docx npm 包到 /app/node_modules(本地安装,避免临时目录找不到模块)
RUN npm install docx
# 安装 Python 依赖
RUN pip install --no-cache-dir fastapi uvicorn pydantic python-docx
COPY main.py .
EXPOSE 8765
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8765"]
EOF部署主程序源码
cat > main.py<< 'EOF'
"""
公文格式化 API 服务(最终版)
接口结构与 v11 完全一致,Word 格式按 GB/T 9704-2012 重写
XML 元素严格按 OOXML CT_PPr schema 顺序插入:
pBdr(8) → tabs(10) → spacing(21) → ind(22) → jc(26)
"""
from fastapi import FastAPI, HTTPException, Body
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from typing import Optional, List, Literal, Any
import io, json
from urllib.parse import quote
from lxml import etree
from docx import Document
from docx.shared import Pt, Cm, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
from docx.oxml.ns import qn
from docx.oxml import OxmlElement
app = FastAPI(title="公文格式化服务")
# ── 数据模型(与 v11 完全一致)──────────────────────────────────────────────────
class BodyItem(BaseModel):
type: Literal["intro", "heading1", "heading2", "heading3", "para"]
text: str
index: Optional[int] = None
class GongwenRequest(BaseModel):
doc_redheader: Optional[str] = ""
doc_number: Optional[str] = ""
title_lines: List[str] = []
recipient: Optional[str] = ""
body: List[BodyItem] = []
attachments: List[str] = []
signing_org_lines: List[str] = []
date: Optional[str] = ""
cc_orgs: Optional[str] = ""
print_org: Optional[str] = ""
print_date: Optional[str] = ""
urgency: Optional[str] = ""
secrecy: Optional[str] = ""
# ── pPr 子元素 schema 顺序(按 OOXML CT_PPr)───────────────────────────────────
_PPR_ORDER = [
"w:pStyle","w:keepNext","w:keepLines","w:pageBreakBefore","w:framePr",
"w:widowControl","w:numPr","w:suppressLineNumbers",
"w:pBdr",
"w:shd",
"w:tabs",
"w:suppressAutoHyphens","w:kinsoku","w:wordWrap","w:overflowPunct",
"w:topLinePunct","w:autoSpaceDE","w:autoSpaceDN","w:bidi",
"w:adjustRightInd","w:snapToGrid",
"w:spacing",
"w:ind",
"w:contextualSpacing","w:mirrorIndents","w:suppressOverlap",
"w:jc",
"w:textDirection","w:textAlignment","w:textboxTightWrap",
"w:outlineLvl","w:divId","w:cnfStyle",
"w:rPr","w:sectPr","w:pPrChange",
]
def _ppr_insert(pPr, el):
tag_local = el.tag.split("}")[-1] if "}" in el.tag else el.tag
tag_full = f"w:{tag_local}"
try:
target_idx = _PPR_ORDER.index(tag_full)
except ValueError:
pPr.append(el)
return
children = list(pPr)
insert_pos = len(children)
for i, child in enumerate(children):
child_local = child.tag.split("}")[-1] if "}" in child.tag else child.tag
child_full = f"w:{child_local}"
try:
if _PPR_ORDER.index(child_full) > target_idx:
insert_pos = i
break
except ValueError:
continue
pPr.insert(insert_pos, el)
# ── 格式工具 ──────────────────────────────────────────────────────────────────
def _set_font(run, name: str, size_pt: float, bold=False, color_rgb=None):
run.bold = bold
run.font.size = Pt(size_pt)
run.font.name = name
rPr = run._r.get_or_add_rPr()
rFonts = rPr.get_or_add_rFonts()
rFonts.set(qn("w:eastAsia"), name)
if color_rgb:
run.font.color.rgb = RGBColor(*color_rgb)
def _apply_para_fmt(para,
line_pt=28, before_pt=0, after_pt=0,
align=WD_ALIGN_PARAGRAPH.LEFT,
first_line_chars=0,
pBdr=None, tabs=None):
pPr = para._p.get_or_add_pPr()
if pBdr is not None:
_ppr_insert(pPr, pBdr)
if tabs is not None:
_ppr_insert(pPr, tabs)
spacing = OxmlElement("w:spacing")
spacing.set(qn("w:before"), str(int(before_pt * 20)))
spacing.set(qn("w:after"), str(int(after_pt * 20)))
spacing.set(qn("w:lineRule"), "exact")
spacing.set(qn("w:line"), str(int(line_pt * 20)))
_ppr_insert(pPr, spacing)
if first_line_chars:
ind = OxmlElement("w:ind")
ind.set(qn("w:firstLine"), str(int(first_line_chars * 16 * 20)))
_ppr_insert(pPr, ind)
jc_map = {
WD_ALIGN_PARAGRAPH.CENTER: "center",
WD_ALIGN_PARAGRAPH.RIGHT: "right",
WD_ALIGN_PARAGRAPH.JUSTIFY: "both",
WD_ALIGN_PARAGRAPH.LEFT: "left",
}
jc_val = jc_map.get(align, "left")
if jc_val != "left":
jc = OxmlElement("w:jc")
jc.set(qn("w:val"), jc_val)
_ppr_insert(pPr, jc)
def _make_pBdr_bottom(sz, color):
pBdr = OxmlElement("w:pBdr")
bot = OxmlElement("w:bottom")
bot.set(qn("w:val"), "single")
bot.set(qn("w:sz"), str(sz))
bot.set(qn("w:space"), "0")
bot.set(qn("w:color"), color)
pBdr.append(bot)
return pBdr
def _make_pBdr_top(sz, color):
pBdr = OxmlElement("w:pBdr")
top = OxmlElement("w:top")
top.set(qn("w:val"), "single")
top.set(qn("w:sz"), str(sz))
top.set(qn("w:space"), "0")
top.set(qn("w:color"), color)
pBdr.append(top)
return pBdr
def _make_tabs_right(pos_twip):
tabs = OxmlElement("w:tabs")
tab = OxmlElement("w:tab")
tab.set(qn("w:val"), "right")
tab.set(qn("w:pos"), str(pos_twip))
tabs.append(tab)
return tabs
def _add_redheader(doc: Document, text: str) -> None:
"""
红头文字:固定字号 50pt,通过 w:w 元素(字符宽度缩放)压缩到单行。
关键:Word 字符缩放用的是 <w:w w:val="N"/>(不是 w:scale)
val 单位:整数百分比,100 = 100%
版心宽 15.6cm,50pt 全角字宽 ≈ 17.65mm
单行最多 @ 100%:156/17.65 ≈ 8.8 字
所需缩放:scale% = min(100, floor(880 / 字数))
w:w 在 CT_RPr schema 中的位置(index 20):
rFonts(1) → b(2) → color(18) → spacing(19) → w:w(20) → kern(21) → sz(23)
必须按序插入,否则 Word 解析报错或忽略。
"""
if not text or not text.strip():
return
text = text.strip()
char_count = len(text)
scale_pct = min(100, int(880 / char_count)) if char_count > 0 else 100
# 段落(行距固定58磅 = 50pt字号 + 8pt留白)
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
p.paragraph_format.space_before = Pt(0)
p.paragraph_format.space_after = Pt(20)
p.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
p.paragraph_format.line_spacing = Pt(58)
# Run:固定 50pt
run = p.add_run(text)
run.font.size = Pt(50)
run.font.bold = True
run.font.name = "方正小标宋简体"
run.font.color.rgb = RGBColor(0xFF, 0x00, 0x00)
# 按 CT_RPr schema 顺序手动构建 rPr,确保 w:w 在正确位置
rPr = run._r.get_or_add_rPr()
# rFonts(index 1)
rFonts = rPr.get_or_add_rFonts()
rFonts.set(qn("w:eastAsia"), "方正小标宋简体")
rFonts.set(qn("w:ascii"), "方正小标宋简体")
rFonts.set(qn("w:hAnsi"), "方正小标宋简体")
# w:w(index 20):字符宽度缩放,插到 sz 之前
# CT_RPr 子元素完整顺序(仅列关键项)
_RPR_ORDER = [
"w:rStyle", "w:rFonts", "w:b", "w:bCs", "w:i", "w:iCs",
"w:caps", "w:smallCaps", "w:strike", "w:dstrike", "w:outline",
"w:shadow", "w:emboss", "w:imprint", "w:noProof", "w:snapToGrid",
"w:vanish", "w:webHidden", "w:color", "w:spacing", "w:w",
"w:kern", "w:position", "w:sz", "w:szCs",
]
w_el = OxmlElement("w:w")
w_el.set(qn("w:val"), str(scale_pct))
target_idx = _RPR_ORDER.index("w:w")
children = list(rPr)
insert_pos = len(children)
for i, child in enumerate(children):
tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
full = f"w:{tag}"
try:
if _RPR_ORDER.index(full) > target_idx:
insert_pos = i
break
except ValueError:
continue
rPr.insert(insert_pos, w_el)
def _add_para(doc, text, font, size_pt,
align=WD_ALIGN_PARAGRAPH.LEFT,
bold=False, color_rgb=None,
line_pt=28, before_pt=0, after_pt=0,
first_line_chars=0,
pBdr=None, tabs=None):
p = doc.add_paragraph()
_apply_para_fmt(p, line_pt, before_pt, after_pt, align,
first_line_chars, pBdr, tabs)
run = p.add_run(text)
_set_font(run, font, size_pt, bold, color_rgb)
return p
def _add_empty(doc, line_pt=28):
p = doc.add_paragraph()
_apply_para_fmt(p, line_pt=line_pt, before_pt=0, after_pt=0)
# ── 核心:生成公文 ─────────────────────────────────────────────────────────────
def create_gongwen_doc(data: GongwenRequest) -> bytes:
doc = Document()
sec = doc.sections[0]
sec.page_width = Cm(21.0)
sec.page_height = Cm(29.7)
sec.top_margin = Cm(3.7)
sec.bottom_margin = Cm(3.5)
sec.left_margin = Cm(2.8)
sec.right_margin = Cm(2.6)
normal = doc.styles["Normal"]
normal.paragraph_format.space_before = Pt(0)
normal.paragraph_format.space_after = Pt(0)
# 0. 红头
if data.doc_redheader:
_add_redheader(doc, data.doc_redheader)
# 1. 密级
if data.secrecy:
_add_para(doc, data.secrecy, "仿宋_GB2312", 14,
align=WD_ALIGN_PARAGRAPH.CENTER)
# 2. 紧急程度
if data.urgency:
_add_para(doc, data.urgency, "仿宋_GB2312", 14,
align=WD_ALIGN_PARAGRAPH.CENTER)
# 3. 发文字号
if data.doc_number:
_add_para(doc, data.doc_number, "仿宋_GB2312", 16,
align=WD_ALIGN_PARAGRAPH.CENTER)
# 4. 红色版头分隔线
_add_para(doc, "", "仿宋_GB2312", 14,
line_pt=4, before_pt=4, after_pt=4,
pBdr=_make_pBdr_bottom(18, "FF0000"))
# 5. 标题(红线下空1行,行距35磅)
_add_empty(doc, line_pt=35)
for line in data.title_lines:
_add_para(doc, line, "方正小标宋简体", 22,
align=WD_ALIGN_PARAGRAPH.CENTER, line_pt=35)
# 6. 标题与正文间距
_add_empty(doc, line_pt=29.8)
# 7. 主送机关
if data.recipient:
_add_para(doc, data.recipient + ":", "仿宋_GB2312", 16)
# 8. 正文
for item in data.body:
if item.type == "heading1":
_add_para(doc, item.text, "黑体", 16,
align=WD_ALIGN_PARAGRAPH.JUSTIFY,
first_line_chars=2)
elif item.type == "heading2":
_add_para(doc, item.text, "楷体", 16,
align=WD_ALIGN_PARAGRAPH.JUSTIFY,
first_line_chars=2)
else:
_add_para(doc, item.text, "仿宋_GB2312", 16,
align=WD_ALIGN_PARAGRAPH.JUSTIFY,
first_line_chars=2)
# 9. 附件
if data.attachments:
_add_empty(doc)
for i, att in enumerate(data.attachments):
prefix = "附件:" if len(data.attachments) == 1 else f"附件{i+1}:"
_add_para(doc, prefix + att, "仿宋_GB2312", 16)
# 10. 空3行
_add_empty(doc)
_add_empty(doc)
_add_empty(doc)
# 11. 署名机关
for line in data.signing_org_lines:
if line.strip():
_add_para(doc, line, "仿宋_GB2312", 16,
align=WD_ALIGN_PARAGRAPH.RIGHT)
# 12. 成文日期
if data.date:
_add_para(doc, data.date, "仿宋_GB2312", 16,
align=WD_ALIGN_PARAGRAPH.RIGHT, after_pt=8)
# 13. 版记
if data.cc_orgs or data.print_org:
_add_para(doc, "", "仿宋_GB2312", 14,
line_pt=6,
pBdr=_make_pBdr_bottom(6, "000000"))
if data.cc_orgs:
_add_para(doc, "抄送:" + data.cc_orgs, "仿宋_GB2312", 14,
line_pt=22)
_add_para(doc, "", "仿宋_GB2312", 14,
line_pt=6,
pBdr=_make_pBdr_top(6, "000000"))
if data.print_org or data.print_date:
p = doc.add_paragraph()
_apply_para_fmt(p, line_pt=20,
tabs=_make_tabs_right(8827))
r_left = p.add_run(data.print_org or "")
_set_font(r_left, "仿宋_GB2312", 14)
p.add_run("\t")
r_right = p.add_run((data.print_date or "") + "印发")
_set_font(r_right, "仿宋_GB2312", 14)
buf = io.BytesIO()
doc.save(buf)
raw = _fix_zoom(buf.getvalue())
return raw
# ── settings.xml zoom 修复 ────────────────────────────────────────────────────
import zipfile, re as _re
def _fix_zoom(docx_bytes: bytes) -> bytes:
import io as _io
buf_in = _io.BytesIO(docx_bytes)
buf_out = _io.BytesIO()
with zipfile.ZipFile(buf_in, 'r') as zin, \
zipfile.ZipFile(buf_out, 'w', zipfile.ZIP_DEFLATED) as zout:
for item in zin.infolist():
data = zin.read(item.filename)
if item.filename == 'word/settings.xml':
text = data.decode('utf-8')
text = _re.sub(
r'<w:zoom((?:(?!percent)[^/])*?)/>',
r'<w:zoom\1 w:percent="100"/>',
text
)
data = text.encode('utf-8')
zout.writestr(item, data)
return buf_out.getvalue()
# ── 接口(与 v11 完全一致)───────────────────────────────────────────────────────
@app.post("/generate_from_llm_text")
async def generate_from_llm_text(request_data: Any = Body(...)):
"""
接收 LLM 输出的 JSON,生成 Word 文档并直接下载。
兼容两种输入格式:
1. {"llm_output": { ... }} (Dify 常见格式)
2. { ... } (直接传入公文对象)
"""
try:
raw_data = request_data
if isinstance(raw_data, dict) and "llm_output" in raw_data:
content = raw_data["llm_output"]
if isinstance(content, str):
try:
content = json.loads(content)
except json.JSONDecodeError:
raise HTTPException(status_code=400,
detail="llm_output 内容不是有效的 JSON 字符串")
else:
content = raw_data
try:
gongwen_data = GongwenRequest(**content)
except Exception as e:
raise HTTPException(status_code=400, detail=f"数据格式校验失败: {str(e)}")
safe_title = "公文"
if gongwen_data.title_lines:
safe_title = gongwen_data.title_lines[0].replace("/", "_").replace("\\", "_")[:20]
filename = f"{safe_title}.docx"
doc_bytes = create_gongwen_doc(gongwen_data)
encoded_filename = quote(filename, encoding="utf-8")
return StreamingResponse(
io.BytesIO(doc_bytes),
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
headers={
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
}
)
except HTTPException:
raise
except Exception as e:
import traceback
traceback.print_exc()
raise HTTPException(status_code=500, detail=f"服务器内部错误: {str(e)}")
import base64
class Base64DocxRequest(BaseModel):
base64_content: str # 传入的 base64 字符串
filename: Optional[str] = "公文.docx" # 可选文件名
class Base64DocxResponse(BaseModel):
filename: str
content_type: str
base64_content: str # 解码后重新 base64(或直接透传)
size_bytes: int
@app.post("/base64_to_docx")
async def base64_to_docx(req: Base64DocxRequest):
"""
接收 base64 编码的 docx,解码后以二进制流返回(供 Dify 文档提取器使用)
"""
try:
b64 = req.base64_content.strip()
if "," in b64 and b64.startswith("data:"):
b64 = b64.split(",", 1)[1]
try:
file_bytes = base64.b64decode(b64)
except Exception:
raise HTTPException(status_code=400, detail="base64 解码失败")
if not file_bytes.startswith(b"PK"):
raise HTTPException(status_code=400, detail="不是有效的 docx 文件")
filename = req.filename or "公文.docx"
encoded_filename = quote(filename, encoding="utf-8")
return StreamingResponse(
io.BytesIO(file_bytes),
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
headers={
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
}
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"服务器内部错误: {str(e)}")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8765)
EOF三、公文api
开始构建部署FastAPI程序
docker compose up -d五、测试chat
用以下指令测试业务
curl -s -o test.docx -w "HTTP状态码: %{http_code}\n文件大小: %{size_download} bytes\n" -X POST http://localhost:8765/generate_from_llm_text -H "Content-Type: application/json" -d '{"doc_redheader":"测试机关文件","doc_number":"测发〔2026〕1号","title_lines":["关于测试公文格式化服务的通知"],"recipient":"各测试单位","body":[{"type":"intro","text":"为验证公文格式化API服务是否正常,现通知如下:"},{"type":"heading1","text":"一、测试内容"},{"type":"para","text":"本次测试涵盖红头、标题、正文、附件及版记等全部要素。"},{"type":"heading2","text":"(一)格式校验"},{"type":"para","text":"请检查生成文档的字体、字号、行距及页边距是否符合GB/T 9704-2012标准。"}],"attachments":["测试附件清单"],"signing_org_lines":["测试机关"],"date":"2026年7月2日","cc_orgs":"抄送测试部门","print_org":"测试机关办公室","print_date":"2026年7月2日"}' && echo "✅ 文件已保存为 test.docx,请用Word或WPS打开验证排版" || echo "❌ 请求失败"