Python: Word 文檔章節操作完整指南：從理解結構到精確刪除 ; 段落索引 vs body索引; doc.paragraphs vs doc.element.body; 「整份文件，是由一串段落和表格依序排成的；最後用 sectPr 標記這個 section 的格式」

📝 建立測試文檔

首先，讓我們創建一個結構完整的測試文檔，包含標題、段落和表格：

from docx import Document
from docx.shared import Inches
from docx.enum.style import WD_STYLE_TYPE

# 創建新文檔
doc = Document()

# 添加文檔標題
doc.add_heading('技術文件：產品測試規範', 0)

# 第1章
doc.add_heading('1. 概述', 1)
doc.add_paragraph('本文檔定義了產品測試的標準流程和規範。')
doc.add_paragraph('適用於所有硬體產品的測試驗證。')

# 第1章的表格
table1 = doc.add_table(rows=3, cols=3)
table1.style = 'Light List Accent 1'
# 填充表格
header_cells = table1.rows[0].cells
header_cells[0].text = '測試項目'
header_cells[1].text = '標準'
header_cells[2].text = '負責人'

table1.rows[1].cells[0].text = '功能測試'
table1.rows[1].cells[1].text = 'IEC 60950'
table1.rows[1].cells[2].text = '張三'

table1.rows[2].cells[0].text = '性能測試'
table1.rows[2].cells[1].text = 'ISO 9001'
table1.rows[2].cells[2].text = '李四'

# 第2章
doc.add_heading('2. PCBA測試計畫', 1)
doc.add_paragraph('印刷電路板組裝(PCBA)的測試流程包括以下步驟：')
doc.add_paragraph('1) 外觀檢查\n2) 電氣測試\n3) 功能驗證')

# 第2章的表格
table2 = doc.add_table(rows=4, cols=2)
table2.style = 'Light Grid Accent 1'
table2.rows[0].cells[0].text = '測試階段'
table2.rows[0].cells[1].text = '所需時間'
table2.rows[1].cells[0].text = 'ICT測試'
table2.rows[1].cells[1].text = '5分鐘'
table2.rows[2].cells[0].text = '功能測試'
table2.rows[2].cells[1].text = '15分鐘'
table2.rows[3].cells[0].text = '燒機測試'
table2.rows[3].cells[1].text = '24小時'

doc.add_paragraph('測試完成後需要生成測試報告。')

# 第3章
doc.add_heading('3. 環境測試要求', 1)
doc.add_paragraph('所有產品必須通過以下環境測試：')
doc.add_paragraph('• 高低溫測試：-40°C 至 +85°C')
doc.add_paragraph('• 濕度測試：95% RH')
doc.add_paragraph('• 振動測試：10-500Hz')

# 第4章（包含子章節）
doc.add_heading('4. 品質標準', 1)
doc.add_paragraph('本章定義產品品質標準。')

doc.add_heading('4.1 外觀標準', 2)
doc.add_paragraph('產品外觀不得有明顯瑕疵。')

doc.add_heading('4.2 電氣標準', 2)
doc.add_paragraph('所有電氣參數必須符合規格書要求。')

# 第5章
doc.add_heading('5. PCBA返修流程', 1)
doc.add_paragraph('當PCBA測試失敗時，需要進行返修。')
doc.add_paragraph('返修流程包括：故障分析、維修、重新測試。')

# 保存文檔
doc.save(r'D:\Temp\test_document.docx')
print("✅ 測試文檔已創建：test_document.docx")

生成的 test_document.docx:

📊 視覺化文檔結構

讓我們先看看文檔的內部結構：

def visualize_document_structure(doc, max_elements=50):
    """
    可視化文檔結構，幫助理解元素排列
    """
    from docx.text.paragraph import Paragraph
    
    body = doc.element.body
    print(f"\n📋 文檔結構分析（共 {len(body)} 個元素）")
    print("=" * 80)
    
    for i, elem in enumerate(list(body)[:max_elements]):
        if elem.tag.endswith('p'):
            para = Paragraph(elem, doc)
            style = para.style.name
            text = para.text[:50] + "..." if len(para.text) > 50 else para.text
            
            # 用不同符號標記不同層級的標題
            if style == 'Heading 1':
                print(f"[{i:3d}] 📌 H1: {text}")
            elif style == 'Heading 2':
                print(f"[{i:3d}]   📎 H2: {text}")
            elif style == 'Title':
                print(f"[{i:3d}] 📑 標題: {text}")
            else:
                print(f"[{i:3d}]   段落: {text}")
                
        elif elem.tag.endswith('tbl'):
            print(f"[{i:3d}] 📊 【表格】")
        else:
            tag_name = elem.tag.split('}')[-1] if '}' in elem.tag else elem.tag
            print(f"[{i:3d}] ❓ 其他: {tag_name}")

# 載入並分析文檔
doc = Document(r'D:\Temp\test_document.docx')
visualize_document_structure(doc)

輸出:

body_children = list(body)

目前使用elem.tag分辨是段落還是表格,
其他方法:

切片到僅有一個段落,一個表格:

code:

def comprehensive_comparison(doc):
    """全面比較三種識別方法"""
    from docx.oxml.text.paragraph import CT_P
    from docx.oxml.table import CT_Tbl
    
    body = doc.element.body
    
    print("🔍 三種方法的比較：\n")
    
    # 方法 1: endswith
    method1_results = []
    for elem in body:
        if elem.tag.endswith('p'):
            method1_results.append('paragraph')
        elif elem.tag.endswith('tbl'):
            method1_results.append('table')
        else:
            method1_results.append('other')
    
    # 方法 2: type() 比較
    method2_results = []
    for elem in body:
        if type(elem).__name__ == 'CT_P':
            method2_results.append('paragraph')
        elif type(elem).__name__ == 'CT_Tbl':
            method2_results.append('table')
        else:
            method2_results.append('other')
    
    # 方法 3: isinstance
    method3_results = []
    for elem in body:
        if isinstance(elem, CT_P):
            method3_results.append('paragraph')
        elif isinstance(elem, CT_Tbl):
            method3_results.append('table')
        else:
            method3_results.append('other')
    
    # 比較結果
    all_same = method1_results == method2_results == method3_results
    print(f"三種方法結果完全一致: {all_same}")
    
    if not all_same:
        print("\n差異分析：")
        for i, (r1, r2, r3) in enumerate(zip(method1_results, method2_results, method3_results)):
            if r1 != r2 or r2 != r3:
                print(f"  索引 {i}: endswith={r1}, type={r2}, isinstance={r3}")

comprehensive_comparison(doc)

輸出:

使用'<w:p>’ in repr(b) 或
‘CT_P’ in repr(b) #可用但不推薦:

「整份文件，是由一串段落和表格依序排成的；
最後用 sectPr 標記這個 section 的格式」。

建議：

🥇 type(elem).__name__ == 'CT_P' —— 可讀性與效率的折衷方案
🥈 elem.tag.endswith('p') —— 寫法最簡單，速度也不錯
🥉 isinstance(elem, CT_P) —— 最精確，但需要先匯入 CT_P 類別
🎯 'CT_P' in repr(elem) —— 能用但不建議：
效能較差：必須先建立完整字串再搜尋
不夠穩定：過度依賴內部實作細節，未來版本可能改變
✅ 使用 qn：
from docx.oxml.ns import qn;
elem.tag == qn('w:tbl')#表格
elem.tag == qn('w:p')#段落
——
以 XML QName 比對標籤，語義最清楚，
也最不受命名空間實作影響,最貼近 XML 規範

🔍 理解段落索引 vs Body索引的差異

這是理解文檔操作的關鍵：

def analyze_index_mapping(doc):
    """
    分析段落索引和body索引的對應關係
    """
    body = doc.element.body
    body_children = list(body)
    
    print("\n🔗 索引對應關係分析")
    print("=" * 60)
    print("段落索引 → Body索引 | 內容")
    print("-" * 60)
    
    para_idx = 0
    for body_idx, elem in enumerate(body_children):
        if elem.tag.endswith('p'):
            para = doc.paragraphs[para_idx]
            text = para.text[:40] + "..." if len(para.text) > 40 else para.text
            print(f"P[{para_idx:2d}] → B[{body_idx:2d}] | {text}")
            para_idx += 1
        elif elem.tag.endswith('tbl'):
            print(f"      → B[{body_idx:2d}] | 📊 表格（不佔用段落索引）")
    
    print(f"\n📈 統計：")
    print(f"   總段落數：{len(doc.paragraphs)}")
    print(f"   總元素數：{len(body_children)}")
    print(f"   表格數量：{len(doc.tables)}")

analyze_index_mapping(doc)

輸出結果:

段落/表格以外,
其他可能遇到的兄弟元素:

# Word 文檔中常見的 body 子元素類型
COMMON_BODY_ELEMENTS = {
    'p': '段落',
    'tbl': '表格',
    'bookmarkStart': '書籤開始',
    'bookmarkEnd': '書籤結束',
    'sectPr': '節屬性（頁面設置）',
    'sdt': '結構化文檔標籤（內容控制項）',
    'customXml': '自訂 XML',
    'altChunk': '替代內容塊',
    'ins': '插入修訂（追蹤修訂）',
    'del': '刪除修訂（追蹤修訂）',
}

🎯 查找章節邊界（核心功能）

實現一個完整的章節邊界查找功能：

#from docx.text.paragraph import Paragraph
def find_heading_boundaries(doc, target_text="", style_name="Heading 1",
                          exact=False, case_sensitive=False):
    """
    找出章節的真實邊界，包含所有元素
    """
    from docx.text.paragraph import Paragraph
    # 取得文檔主體的所有子元素（段落、表格等）
    body = doc.element.body
    body_children = list(body)
    
    # # 設定文字標準化函數（根據是否區分大小寫）
    if case_sensitive:
        norm = lambda s: s.strip()
        search_key = target_text.strip() if target_text else ""
    else:
        norm = lambda s: s.strip().lower()
        search_key = target_text.strip().lower() if target_text else ""
        #target_text = "L10 Test Plan"
        #search_key #'l10 test plan'
    
    # 內部匹配函數
    def matches_search(text):
        """這個 text 會接收每個Heading 1段落的文字內容
        #'1\tProduct Feature Summary'...
        #'9\tL10 Test Plan'(需命中此Heading 1標題)...
        """
        normalized_text = norm(text)
        #'1\tproduct feature summary'...'9\tl10 test plan'...
        #依據case_sensitive True/False, 使用不同的norm函數
        #再依據exact True/False ,決定用== 還是in 邏輯 
        if exact:
            # 精確匹配：必須完全相同
            return normalized_text == search_key
        else:
            # 模糊匹配：只要包含搜尋文字即可
            # 例如: 搜尋 'l10 test plan' 可以匹配
            # '9\tl10 test plan'
            return search_key in normalized_text
            
    
    sections = [] # 儲存所有找到的章節
    current_section = None # 當前正在處理的章節
    
    # 遍歷所有 body 元素
    for body_idx, elem in enumerate(body_children):
        if elem.tag.endswith('p'):
        #if type(elem).__name__ == "CT_P"
            # 將底層 XML 元素包裝成高層 Paragraph 對象
            para = Paragraph(elem, doc)
            #Paragraph(elem, body)
            """
            # 重要：這裡必須使用 doc 作為 parent，不能用 body！
            為什麼要用 Paragraph(elem, doc)：
            
            ✅ doc 對象有 'part' 屬性，允許訪問 para.style
            ✅ 可以正確獲取樣式名稱 (para.style.name)
            ✅ 這是手動創建 Paragraph 時訪問 style 的唯一方法
            
            ❌ 不要使用 Paragraph(elem, body)：
            - body (CT_Body) 沒有 'part' 屬性
            - 訪問 para.style 時會報錯：'CT_Body' object has no attribute 'part'
            
            注意：這種用法不是官方 API，但在需要遍歷 body 元素
            並同時訪問樣式時，這是可行的解決方案。
            """
            
            #檢查這個段落是否為目標標題（例如：Heading 1）
            if para.style.name == style_name:
                # 完成前一個章節
                if current_section is not None:
                    current_section['body_end'] = body_idx
                    
                    # 檢查是否匹配搜索條件
                    if not target_text or matches_search(current_section['heading_text']):
                        sections.append(current_section)
                
                # 開始新章節
                current_section = {
                    'heading_text': para.text,# 標題文字
                    'body_start': body_idx,   # 章節開始位置（包含標題本身）
                    'body_end': None,         # 章節結束位置（稍後填充）
                    'tables': [],             # 章節內的表格索引列表
                    'paragraphs': [body_idx], # 章節內的段落索引列表（含標題）
                }
            else:
                # 記錄段落位置
                # 不是標題的段落，記錄到當前章節中
                if current_section is not None:
                    current_section['paragraphs'].append(body_idx)
                    
        elif elem.tag.endswith('tbl'):
            # 記錄表格位置
            if current_section is not None:
                current_section['tables'].append(body_idx)
    
    # 處理最後一個章節
    if current_section is not None:
        current_section['body_end'] = len(body_children)
        if not target_text or matches_search(current_section['heading_text']):
            sections.append(current_section)
    
    # 添加統計信息
    for section in sections:
        section['table_count'] = len(section['tables'])
        section['paragraph_count'] = len(section['paragraphs'])
    
    return sections

# 測試查找功能
print("\n🔍 查找所有章節：")
all_sections = find_heading_boundaries(doc)
for s in all_sections:
    print(f"\n📍 {s['heading_text']}")
    print(f"   範圍: body[{s['body_start']}:{s['body_end']}]")
    print(f"   包含: {s['paragraph_count']} 個段落, {s['table_count']} 個表格")

# 查找特定章節
print("\n🔍 查找包含 'PCBA' 的章節：")
pcba_sections = find_heading_boundaries(doc, "PCBA")
for s in pcba_sections:
    print(f"   找到: {s['heading_text']}")
    
"""find_heading_boundaries(doc)
[{'heading_text': '1\tProduct Feature Summary',
  'body_start': 31,
  'body_end': 33,
  'tables': [],
  'paragraphs': [31, 32],
  'table_count': 0,
  'paragraph_count': 2},
   {'heading_text': '2\tPCBA (L6) Test Plan of Record',
  'body_start': 33,
  'body_end': 52,
  'tables': [35],
  'paragraphs': [33, 34, 36, 37, 38, 39, 40, 42, 43, 44, 46, 47, 48, 50, 51],
  'table_count': 1,
  'paragraph_count': 15},...
"""

輸出:

Paragraph(elem, body) 的作用是：

elem：段落的 XML 元素（CT_P 對象）
~~body~~：包含這個段落的父容器
#使用doc當成父容器才有完整的屬性可以使用

這個構造方式讓你能夠：

將底層的 XML 元素轉換為高級的 Paragraph 對象
保留文檔的完整結構和順序
處理 doc.paragraphs 無法處理的特殊情況
#doc.paragraphs 無法獲取所有段落順序(跳過表格…)

from docx.oxml.ns import qn
#elem.tag 可以用來判斷元素類型
W_P = qn('w:p') #段落
#'{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p'

W_TBL = qn('w:tbl') #表格
#'{http://schemas.openxmlformats.org/wordprocessingml/2006/main}tbl'

W_SECTPR =qn('w:sectPr') #“section properties”（節屬性）
#'{http://schemas.openxmlformats.org/wordprocessingml/2006/main}sectPr'

def _enumerate_body(doc: Document):
    """Enumerate body returning 
    List[Tuple[index, kind, object]] 
    List[Tuple[int, str, Paragraph|Table]] 
    for paragraphs and tables.

    以底層 XML tag 比對：w:p / w:tbl。
    """
    from docx.text.paragraph import Paragraph
    from docx.table import Table
    items = []
    body = doc.element.body
    for idx, elem in enumerate(body.iterchildren()):
        tag = elem.tag
        if tag == W_P:  # 段落
            para = Paragraph(elem, doc)
            items.append((idx, 'paragraph', para))
        elif tag == W_TBL:  # 表格
            tbl = Table(elem, doc)
            items.append((idx, 'table', tbl))
        else:
            # sectPr 或其它不處理元素略過
            pass
    return items

🗑️ 刪除章節（最終實現）

def delete_sections_by_heading(doc, remove_texts, 
                              case_sensitive=False, 
                              exact=False,
                              keep_heading=False,
                              dry_run=False):
    """
    刪除指定的章節，包含其中的所有內容
    """
    if not remove_texts:
        print("❌ 沒有指定要刪除的章節")
        return doc
    
    # 確保 remove_texts 是列表
    if isinstance(remove_texts, str):
        remove_texts = [remove_texts]
    
    # 收集所有要刪除的章節
    all_targets = []
    for text in remove_texts:
        matches = find_heading_boundaries(doc, text, "Heading 1", 
        exact, case_sensitive)
        all_targets.extend(matches)
    
    # 去重
    unique_targets = []
    seen = set()
    for target in all_targets:
        key = target['body_start']
        if key not in seen:
            seen.add(key)
            unique_targets.append(target)
    
    if not unique_targets:
        print(f"❌ 沒有找到匹配的章節")
        return doc
    
    print(f"\n🎯 找到 {len(unique_targets)} 個要刪除的章節")
    
    if dry_run:
        print("\n📋 預覽模式 - 將會刪除以下章節：")
        for section in unique_targets:
            print(f"\n   章節: {section['heading_text']}")
            print(f"   範圍: body[{section['body_start']}:{section['body_end']}]")
            print(f"   內容: {section['paragraph_count']} 段落, {section['table_count']} 表格")
            if keep_heading:
                print("   (保留標題)")
        return doc
    
    # 執行刪除
    body = doc.element.body
    
    # 按位置倒序刪除（避免索引變化）
    for section in sorted(unique_targets, key=lambda x: x['body_start'], reverse=True):
        print(f"\n🗑️ 正在刪除: {section['heading_text']}")
        
        # 確定刪除範圍
        start = section['body_start'] + 1 if keep_heading else section['body_start']
        end = section['body_end']
        
        # 收集要刪除的元素
        elements_to_remove = []
        for i in range(start, end):
            if i < len(body):
                elements_to_remove.append(body[i])
        
        # 執行刪除
        removed = {'paragraphs': 0, 'tables': 0}
        for elem in elements_to_remove:
            try:
                body.remove(elem)
                if elem.tag.endswith('tbl'):
                    removed['tables'] += 1
                else:
                    removed['paragraphs'] += 1
            except:
                pass
        
        print(f"   ✅ 已刪除: {removed['paragraphs']} 段落, {removed['tables']} 表格")
    
    return doc

# 測試刪除功能
# 1. 預覽模式
print("\n" + "="*60)
print("測試 1: 預覽刪除包含 'PCBA' 的章節")
delete_sections_by_heading(doc, "PCBA", dry_run=True)

# 2. 實際刪除並保存
print("\n" + "="*60)
print("測試 2: 實際刪除並保存")
doc_copy = Document(r'D:\Temp\test_document.docx')  # 重新載入
delete_sections_by_heading(doc_copy, ["PCBA", "環境"], dry_run=False)
doc_copy.save(r'D:\Temp\test_document_modified.docx')
print("\n✅ 已保存修改後的文檔：test_document_modified.docx")

# 3. 驗證結果
print("\n" + "="*60)
print("驗證刪除結果：")
doc_modified = Document(r'D:\Temp\test_document_modified.docx')
remaining_sections = find_heading_boundaries(doc_modified)
print(f"\n剩餘章節數：{len(remaining_sections)}")
for s in remaining_sections:
    print(f"   ✓ {s['heading_text']}")

刪除表格的方式:

# 方法1：直接從父元素移除
table_element.getparent().remove(table_element)

# 方法2：從 body 移除
body.remove(table_element)

輸出結果:

test_document_modified.docx:

📚 完整的測試腳本

將所有功能整合在一起：

# 完整測試流程
def run_complete_test():
    """執行完整的測試流程"""
    print("🚀 開始完整測試流程\n")
    
    # 1. 創建測試文檔
    print("步驟 1: 創建測試文檔")
    from docx import Document
    doc = Document()
    doc.add_heading('測試文檔', 0)
    
    # 添加多個章節
    sections_data = [
        ("1. 產品概述", ["這是產品介紹", "包含基本信息"], True),
        ("2. PCBA測試規範", ["測試流程說明", "測試標準定義"], True),
        ("3. 軟體測試", ["軟體功能測試", "性能測試"], False),
        ("4. PCBA維修指南", ["維修流程", "注意事項"], True),
        ("5. 品質保證", ["品質標準", "檢驗流程"], False),
    ]
    
    for title, paragraphs, add_table in sections_data:
        doc.add_heading(title, 1)
        for p in paragraphs:
            doc.add_paragraph(p)
        if add_table:
            table = doc.add_table(rows=2, cols=2)
            table.style = 'Light List Accent 1'
            table.rows[0].cells[0].text = '項目'
            table.rows[0].cells[1].text = '說明'
    
    doc.save('complete_test.docx')
    print("   ✅ 文檔已創建\n")
    
    # 2. 分析文檔結構
    print("步驟 2: 分析文檔結構")
    doc = Document('complete_test.docx')
    sections = find_heading_boundaries(doc)
    print(f"   找到 {len(sections)} 個章節\n")
    
    # 3. 測試不同的刪除場景
    print("步驟 3: 測試各種刪除場景\n")
    
    # 場景1: 刪除包含特定關鍵字的章節
    print("   場景 1: 刪除所有包含 'PCBA' 的章節")
    doc1 = Document('complete_test.docx')
    delete_sections_by_heading(doc1, "PCBA", dry_run=False)
    doc1.save('test_result_1.docx')
    print("   ✅ 已保存到 test_result_1.docx\n")
    
    # 場景2: 精確匹配刪除
    print("   場景 2: 精確匹配刪除 '3. 軟體測試'")
    doc2 = Document('complete_test.docx')
    delete_sections_by_heading(doc2, "3. 軟體測試", exact=True, dry_run=False)
    doc2.save('test_result_2.docx')
    print("   ✅ 已保存到 test_result_2.docx\n")
    
    # 場景3: 保留標題只刪除內容
    print("   場景 3: 刪除 '品質保證' 但保留標題")
    doc3 = Document('complete_test.docx')
    delete_sections_by_heading(doc3, "品質保證", keep_heading=True, dry_run=False)
    doc3.save('test_result_3.docx')
    print("   ✅ 已保存到 test_result_3.docx\n")
    
    print("🎉 測試完成！請查看生成的文檔文件。")

# 執行測試
run_complete_test()

輸出:

test_result_1.docx

🎓 總結與最佳實踐

關鍵概念

段落索引 vs Body索引：段落索引只計算段落，Body索引包含所有元素
表格不佔用段落索引：這是最常見的錯誤來源
倒序刪除：避免索引變化影響

使用建議

總是使用 dry_run=True 先預覽
保存原始文檔的備份
使用視覺化函數理解文檔結構

進階應用

可以擴展支持 Heading 2, Heading 3 等多層級
可以添加更多過濾條件（如日期、作者等）
可以整合到自動化文檔處理流程中

這個完整的解決方案解決了技術債問題，提供了準確、可靠的章節刪除功能！

簡化版的 find_heading_boundaries:

# %%
def find_heading_boundaries(doc, target_text="", style_name=HEADING1,
                            exact=False, 
                            case_sensitive=False) -> list[tuple[int, int, str]]:
    """
    取得符合 target_text 的指定層級 Heading 區間邊界。
    回傳: list[(start_idx, end_idx, heading_text)]
    start_idx: 目標 Heading 所在段落索引
    end_idx  : 下一個同級 Heading 的 start_idx；最後一章節為 len(doc.paragraphs)
    exact=True  -> 文字完全比對 (strip 後)
    exact=False -> target_text 為子字串
    case_sensitive=True  -> 大小寫敏感 (原樣比對)
    case_sensitive=False -> 忽略大小寫 (統一轉小寫後比對)

    target_text="" , exact=False,因為 "" in 任意字串為 True，
    會回傳所有 Heading 1 的 (start,end,text)；
    功能上可「取代」 build_heading_sections
    可讀性：用空字串代表「全部」不直觀，後續閱讀者需要知道這個技巧。
    """
    tgt = target_text.strip()
    # if case_sensitive:
    #     tgt = tgt
    # else:
    #     tgt = tgt.lower()
    tgt = tgt if case_sensitive else tgt.lower()

    from docx.text.paragraph import Paragraph
    body = doc.element.body
    headings = []
    for i,elem in enumerate(body):
        if type(elem).__name__ == "CT_P":
            para = Paragraph(elem,doc)
            if para.style.name == style_name:
                headings.append( (i,para.text) )

    if not headings:
        return []

    total = len(body)
    out = []
    for k, (start_idx, raw_text) in enumerate(headings):
        
        comp = raw_text.strip().replace("\r", "").replace("\n", " ")
        #處理": ('9\tFixture and capacity plan\nTest Capacity Plan(TBD)', 'Heading 1', 152)
        # 標題本 不該 有 換行符號,其實是範本的問題,從範本中修訂:
        #('9\tFixture and capacity plan', 'Heading 1')
        # 使用 exact=False ; in 邏輯 會比較能夠匹配實際狀況
        if case_sensitive:
            comp_norm = comp
        else:
            comp_norm = comp.lower()
        hit = (comp_norm == tgt) if exact else (tgt in comp_norm)
        if hit:
            end_idx = headings[k + 1][0] if k + 1 < len(headings) else total
            out.append((start_idx, end_idx, raw_text))
    return out

更簡化版的 find_heading_boundaries:

def find_heading_boundaries(doc, target_text="", style_name=HEADING1,
                            exact=False, case_sensitive=False):
    from docx.text.paragraph import Paragraph
    body = doc.element.body
    tgt = target_text.strip().lower() if not case_sensitive else target_text.strip()
    
    # 找出所有符合的標題索引
    matches = []
    all_headings = []
    
    for i, elem in enumerate(body):
        if type(elem).__name__ == "CT_P":
            p = Paragraph(elem, doc)
            if p.style.name == style_name:
                all_headings.append(i)
                comp = p.text.strip().lower() if not case_sensitive else p.text.strip()
                if (comp == tgt if exact else tgt in comp):
                    matches.append((i, p.text))
    
    # 計算結束位置
    return [
        (start, next((h for h in all_headings if h > start), len(body)), text)
        for start, text in matches
    ]

改良版:

def find_heading_boundaries(doc, target_text="", style_name=None,
                            exact=False, case_sensitive=False):
    from docx.text.paragraph import Paragraph
    body = doc.element.body
    tgt = target_text.strip().lower() if not case_sensitive else target_text.strip()
    
    # 預處理 style_name
    if isinstance(style_name, str):
        style_name = [style_name]
    
    matches = []
    all_headings = []
    
    for i, elem in enumerate(body):
        if type(elem).__name__ == "CT_P":
            p = Paragraph(elem, doc)
            style = p.style.name
            
            # 檢查樣式
            is_target_style = (
                style.startswith("Heading") if style_name is None 
                else style in style_name
            )
            
            if is_target_style:
                all_headings.append(i)
                comp = p.text.strip().lower() if not case_sensitive else p.text.strip()
                if (comp == tgt if exact else tgt in comp):
                    matches.append((i, p.text))
    # 計算每個符合標題的區間範圍
    return [
        (start, # 標題開始位置
        # 找下一個標題位置，若無則用文件結尾
        next((h for h in all_headings if h > start), len(body)), 
        text # 標題文字
        )
        for start, text in matches
    ]

改良版(typing / 註解完整):

from typing import List, Tuple, Union, Optional
from docx import Document

def find_heading_boundaries(
    doc: Document, 
    target_text: str = "", 
    style_name: Union[str, List[str], None] = None,
    exact: bool = False, 
    case_sensitive: bool = False
) -> List[Tuple[int, int, str]]:
    """
    取得符合條件的標題章節邊界。
    
    在文件中尋找符合指定文字的標題，並回傳每個符合標題的段落範圍。
    可用於定位、擷取或刪除特定章節。
    
    Args:
        doc: python-docx Document 物件
        target_text: 要搜尋的文字內容
            - 空字串 "" (預設): 回傳所有符合樣式的標題
            - 非空字串: 根據 exact 參數進行比對
        style_name: 要搜尋的標題樣式
            - None (預設): 所有 "Heading" 開頭的樣式
            - str: 單一樣式名稱，如 "Heading 1"
            - List[str]: 多個樣式名稱，如 ["Heading 1", "Heading 2"]
        exact: 是否精確比對
            - False (預設): target_text 為子字串即符合
            - True: 必須完全相同（會先 strip 空白）
        case_sensitive: 是否區分大小寫
            - False (預設): 忽略大小寫差異
            - True: 大小寫必須相符
    
    Returns:
        List[Tuple[int, int, str]]: 符合條件的標題清單，每個元素包含：
            - [0] start_idx: 標題段落在 body 中的索引
            - [1] end_idx: 下個同級標題的索引（或文件結尾）
            - [2] heading_text: 標題的原始文字內容
    
    Examples:
        >>> # 找出所有 Heading 1 標題
        >>> sections = find_heading_boundaries(doc, style_name="Heading 1")
        
        >>> # 找出包含 "Test Plan" 的所有標題
        >>> test_sections = find_heading_boundaries(doc, "Test Plan")
        
        >>> # 精確尋找特定標題
        >>> exact_section = find_heading_boundaries(
        ...     doc, "9\tL10 Test Plan", 
        ...     style_name="Heading 1", 
        ...     exact=True
        ... )
        
        >>> # 找出 Heading 1 和 2 中包含 "test" 的章節（不分大小寫）
        >>> multi_level = find_heading_boundaries(
        ...     doc, "test",
        ...     style_name=["Heading 1", "Heading 2"],
        ...     case_sensitive=False
        ... )
    
    Note:
        - 標題文字可能包含特殊字元如 \\t (tab) 或 \\n (換行)
        - end_idx 指向下一個同級標題的開始位置，可用於擷取完整章節內容
        - 當 target_text="" 且 exact=False 時，會回傳所有符合樣式的標題
    """
    from docx.text.paragraph import Paragraph
    
    # 取得文件主體元素
    body = doc.element.body
    
    # 預處理搜尋文字
    tgt = target_text.strip()
    if not case_sensitive:
        tgt = tgt.lower()
    
    # 統一處理 style_name 為 list 格式（單一字串轉為 list）
    if isinstance(style_name, str):
        style_name = [style_name]
    
    # 儲存符合條件的標題和所有目標樣式的標題位置
    matches: List[Tuple[int, str]] = []  # [(段落索引, 標題文字), ...]
    all_headings: List[int] = []  # 所有符合樣式的標題索引（用於計算區間）
    
    # 遍歷文件主體的所有元素
    for i, elem in enumerate(body):
        # 只處理段落元素（CT_P = Complex Type Paragraph）
        if type(elem).__name__ == "CT_P":
            p = Paragraph(elem, doc)
            style = p.style.name
            
            # 判斷是否為目標樣式
            is_target_style = (
                style.startswith("Heading") if style_name is None  
                # None = 所有 Heading
                else style in style_name  # 檢查是否在指定樣式清單中
            )
            
            if is_target_style:
                # 記錄所有目標樣式的標題位置（用於後續計算區間範圍）
                all_headings.append(i)
                
                # 準備比對用的文字（根據 case_sensitive 決定是否轉小寫）
                comp = p.text.strip()
                if not case_sensitive:
                    comp = comp.lower()
                
                # 執行文字比對（只有匹配的才加入結果）
                is_match = (comp == tgt) if exact else (tgt in comp)
                
                if is_match:
                    # 保留原始文字（不做 lower 處理）
                    matches.append((i, p.text))
    
    # 計算每個符合標題的區間範圍
    total_elements = len(body)
    results: List[Tuple[int, int, str]] = []
    
    for start_idx, heading_text in matches:
        # 使用 next() 找出下一個同級標題的位置
        # 如果沒有下一個標題，則以文件結尾為邊界
        end_idx = next(
            (h for h in all_headings if h > start_idx),  # 找第一個大於 start_idx 的標題
            total_elements  # 預設值：文件結尾
        )
        results.append((start_idx, end_idx, heading_text))
    
    return results

enumerate(doc.element.body)
圖片不是頂層元素

body 的直接子元素：
├── w:p (段落) ✅ 列舉
├── w:tbl (表格) ✅ 列舉
└── w:sectPr (節屬性) ❌ 略過

圖片位於：
└── w:p → w:r → w:drawing ← 圖片藏在段落內部！

圖片會跟著段落一起被複製

Word 文件中圖片的位置

段落中的圖片

Body
└── Paragraph (段落)
    └── Run (文字片段)
        └── Drawing (圖片/圖形) ← 圖片在這裡！
            └── Inline or Anchor

2. 表格儲存格中的圖片

Body
└── Table (表格)
    └── Row (列)
        └── Cell (儲存格) ← Cell 是 Row 的子元素
            └── Paragraph (段落) ← Paragraph 是 Cell 的直接子元素
                └── Run
                    └── Drawing (圖片)

更精確的表示
方式 1：完整階層

Table
  ├── Row[0] (第一列)
  │   ├── Cell[0] (第一個儲存格)
  │   │   └── Paragraph[0]
  │   │       └── Run[0]
  │   │           └── Drawing (圖片)
  │   ├── Cell[1] (第二個儲存格)
  │   │   └── Paragraph[0]
  │   │       └── Run[0]
  │   └── Cell[2]
  │       └── Paragraph[0]
  ├── Row[1] (第二列)
  │   └── ...
  └── ...

方式 2：存取路徑

# 正確的存取順序
table                      # 表格
  .rows[0]                # Row 物件
    .cells[0]             # Cell 物件（Cell 是 Row 的屬性）
      .paragraphs[0]      # Paragraph 物件（Paragraph 是 Cell 的子元素）
        .runs[0]          # Run 物件
          ._element       # XML 元素
            .xpath('.//w:drawing')  # 圖片元素

XML 結構驗證

<w:tbl>                    <!-- Table -->
  <w:tr>                   <!-- Row -->
    <w:tc>                 <!-- Cell (table cell) -->
      <w:p>                <!-- Paragraph ← Cell 的直接子元素！ -->
        <w:r>              <!-- Run -->
          <w:drawing>      <!-- 圖片 -->
            ...
          </w:drawing>
        </w:r>
      </w:p>
    </w:tc>
    <w:tc>                 <!-- 另一個 Cell -->
      <w:p>...</w:p>
    </w:tc>
  </w:tr>
</w:tbl>

實際的 XML 結構

段落中的圖片

<w:p>  <!-- 段落 -->
  <w:r>  <!-- Run -->
    <w:t>這是文字</w:t>
  </w:r>
  <w:r>  <!-- 另一個 Run，包含圖片 -->
    <w:drawing>  <!-- 圖片！ -->
      <wp:inline>
        <a:graphic>
          <a:graphicData>
            <pic:pic>...</pic:pic>
          </a:graphicData>
        </a:graphic>
      </wp:inline>
    </w:drawing>
  </w:r>
  <w:r>  <!-- 後續文字 -->
    <w:t>更多文字</w:t>
  </w:r>
</w:p>