Python-docx 命名空間完全指南：理解 XML Namespace 與 qn() 函數; from docx.oxml.ns import qn #qualified name ; qn(‘w:p’) ; qn(‘w:tbl’)

示例用的test_document.docx
#內容中有兩個表格,其他多數是段落

{namespace}tagname:

在使用 python-docx 處理 Word 文檔時，你可能會遇到這樣的標籤：

{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p

這篇文章將帶你徹底理解這些看似複雜的命名空間。

📚 目錄

什麼是 XML 命名空間？
為什麼 Word 需要命名空間？
qn() 函數：你的好幫手
實戰應用
最佳實踐

1. 什麼是 XML 命名空間？

想像你在一個國際會議上，有三個叫「David」的人：

David from USA
David from Japan
David from France

XML 命名空間就像是加上國籍，避免混淆：

<!-- 沒有命名空間：會混淆 -->
<document>
    <p>這是段落還是價格(price)？</p>
    <table>這是表格還是桌子？</table>
</document>

<!-- 有命名空間：清楚明確 -->
<document>
    <word:p>這是 Word 段落</word:p>
    <html:p>這是 HTML 段落</html:p>
    <word:table>這是 Word 表格</word:table>
    <furniture:table>這是傢俱目錄的桌子</furniture:table>
</document>

2. 為什麼 Word 需要命名空間？

Word 文檔的複雜性

Word 文檔包含多種類型的內容，每種都有自己的命名空間：

# Word 文檔常見的命名空間
NAMESPACES = {
    # 主文檔內容（段落、表格等）
    'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
    
    # 圖片繪製
    'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
    
    # 關係（超連結、圖片引用等）
    'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
    
    # 圖形
    'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
}

實際的 Word XML 結構

<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
    <w:body>
        <w:p>
            <w:r>
                <w:t>Hello World</w:t>
            </w:r>
        </w:p>
        <w:tbl>
            <w:tr>
                <w:tc>
                    <w:p>
                        <w:r>
                            <w:t>表格內容</w:t>
                        </w:r>
                    </w:p>
                </w:tc>
            </w:tr>
        </w:tbl>
    </w:body>
</w:document>

3. qn() 函數：你的好幫手

qn() 是 “qualified name” 的縮寫，它能自動將簡短的標籤名轉換為完整的命名空間格式。

基本用法

from docx.oxml.ns import qn

# qn() 會自動補全命名空間
print(qn('w:p'))
# 輸出: {http://schemas.openxmlformats.org/wordprocessingml/2006/main}p

print(qn('w:tbl'))
# 輸出: {http://schemas.openxmlformats.org/wordprocessingml/2006/main}tbl

輸出:

為什麼要用 qn()？

# ❌ 硬編碼：又長又容易出錯
if elem.tag == '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p':
    print("是段落")

# ✅ 使用 qn()：簡潔清晰
if elem.tag == qn('w:p'):
    print("是段落")

# ✅ 使用 endswith()：簡單實用
if elem.tag.endswith('}p'):
    print("是段落")

其他方式,
使用 type(elem).__name__

body = doc.element.body

for elem in body:
    if type(elem).__name__=="CT_P":
        print("這是段落")
    elif type(elem).__name__=="CT_Tbl":
        print("這是表格")
    elif type(elem).__name__=="CT_SectPr":
        print("這是節設定")

輸出:

4. 實戰應用

範例 1：遍歷文檔元素

from docx import Document
from docx.oxml.ns import qn
from docx.text.paragraph import Paragraph
from docx.table import Table

def analyze_document(filename):
    """分析文檔結構"""
    doc = Document(filename)
    body = doc.element.body
    
    stats = {
        'paragraphs': 0,
        'tables': 0,
        'sections': 0,
        'others': 0
    }
    
    for elem in body:
        if elem.tag == qn('w:p'):
            stats['paragraphs'] += 1
            para = Paragraph(elem, doc)
            print(f"段落: {para.text[:50]}...")
            
        elif elem.tag == qn('w:tbl'):
            stats['tables'] += 1
            table = Table(elem, doc)
            print(f"表格: {len(table.rows)} 行 x {len(table.columns)} 列")
            
        elif elem.tag == qn('w:sectPr'):
            stats['sections'] += 1
            print("節設定")
            
        else:
            stats['others'] += 1
    
    return stats

輸出:

範例 2：創建元素判斷函數

def create_element_checker():
    """創建元素類型檢查器"""
    
    # 預先計算常用標籤的完整名稱
    tags = {
        'paragraph': qn('w:p'),
        'table': qn('w:tbl'),
        'run': qn('w:r'),
        'text': qn('w:t'),
        'break': qn('w:br'),
        'section': qn('w:sectPr'),
    }
    
    def check_element_type(elem):
        """檢查元素類型"""
        for name, tag in tags.items():
            if elem.tag == tag:
                return name
        return 'unknown'
    
    return check_element_type

# 使用
checker = create_element_checker()
for elem in doc.element.body:
    elem_type = checker(elem)
    print(f"元素類型: {elem_type}")

輸出:

範例 3：簡化的方法

def simple_doc_iterator(doc):
    """簡單實用的文檔遍歷器"""
    body = doc.element.body
    
    for elem in body:
        # 使用 endswith 方法 - 簡單有效
        if elem.tag.endswith('}p'):
            yield ('paragraph', Paragraph(elem, doc))
            
        elif elem.tag.endswith('}tbl'):
            yield ('table', Table(elem, doc))
            
        elif elem.tag.endswith('}sectPr'):
            yield ('section', elem)

# 使用範例
doc = Document(filename)
for elem_type, content in simple_doc_iterator(doc):
    if elem_type == 'paragraph':
        print(f"段落: {content.text[:30]}")
    elif elem_type == 'table':
        print(f"表格: {len(content.rows)}x{len(content.columns)}")

輸出:

5. 最佳實踐

選擇合適的方法

# 1. 完整命名空間匹配（最嚴格）
if elem.tag == qn('w:p'):
    pass

# 2. 結尾匹配（平衡安全和簡潔）
if elem.tag.endswith('}p'):
    pass

# 3. 提取標籤名（處理複雜情況）
tag_name = elem.tag.split('}')[-1] if '}' in elem.tag else elem.tag
if tag_name == 'p':
    pass
    
#4. 使用 type(elem).__name__
if type(elem).__name__=="CT_P":
    print("這是段落")

性能考量

# %%
from docx.oxml.ns import qn
import time

# 性能測試
def performance_test(elem, iterations=100000):
    # 方法 1: qn() 每次調用
    start = time.time()
    for _ in range(iterations):
        if elem.tag == qn('w:p'):
            pass
    print(f"qn() 每次調用: {time.time() - start:.4f} 秒")
    
    # 方法 2: 預存 qn() 結果
    wp_tag = qn('w:p')
    start = time.time()
    for _ in range(iterations):
        if elem.tag == wp_tag:
            pass
    print(f"預存 qn() 結果: {time.time() - start:.4f} 秒")
    
    # 方法 3: endswith()
    start = time.time()
    for _ in range(iterations):
        if elem.tag.endswith('}p'):
            pass
    print(f"endswith() 方法: {time.time() - start:.4f} 秒")
    
    # 方法 4: type().__name__
    start = time.time()
    for _ in range(iterations):
        if type(elem).__name__ == 'CT_P':
            pass
    print(f"type().__name__ 方法: {time.time() - start:.4f} 秒")

輸出1(預存qn最快):

輸出2(type().__name__最快):

各方法優缺點比較

方法比較（更新版）：

type(elem).name == ‘CT_P’
✅ 最快（直接比較類名）
✅ 最簡單直觀，一看就懂
✅ 不需要額外導入
✅ 類名穩定，極少改變（對應 XML 標準）
⚠️ 需要知道內部類名（但很好記）
elem.tag == 預存的 qn(‘w:p’)
✅ 速度快（預存結果）
✅ XML 標準正確
❌ 需要額外的初始化代碼
❌ 需要導入 qn
elem.tag.endswith(‘}p’)
✅ 簡單，不需要額外導入
✅ 不需要記住類名
❌ 比 type() 慢約 3 倍
⚠️ 理論上可能匹配到其他命名空間的 p（實務上很少發生）
elem.tag == qn(‘w:p’)（每次調用）
✅ 最符合 XML 標準
❌ 最慢（慢約 7-8 倍）
❌ 需要導入 qn
❌ 每次調用都有開銷

實際應用建議

# 實際應用建議
class ElementChecker:
    """綜合各種方法的元素檢查器"""
    
    # 預計算常用標籤（初始化一次）
    P_TAG = qn('w:p')
    TBL_TAG = qn('w:tbl')
    
    @staticmethod
    def is_paragraph_safe(elem):
        """最安全的方法"""
        return elem.tag == ElementChecker.P_TAG
    
    @staticmethod
    def is_paragraph_fast(elem):
        """最快的方法"""
        return type(elem).__name__ == 'CT_P'
    
    @staticmethod
    def is_paragraph_simple(elem):
        """最簡單的方法"""
        return elem.tag.endswith('}p')

為什麼 type() 最好

# 1. 性能數據說話
"""
type().__name__: 0.0034 秒 ⚡ 最快
預存 qn():       0.004 秒 ⚡ 幾乎一樣快
endswith():      0.0103 秒 🐌 慢 3 倍  
qn() 每次:       0.0249 秒 🐌 慢 7 倍
"""

# 2. 實用性說話
- 不需要 import
- 不需要初始化
- 代碼最清晰
- 類名穩定可靠

# 3. 類名對照表（方便查詢）
ELEMENT_TYPES = {
    "CT_P": "段落",
    "CT_Tbl": "表格", 
    "CT_Tr": "表格行",
    "CT_Tc": "表格格子",
    "CT_R": "Run",
    "CT_Text": "文字",
    "CT_SectPr": "節設定",
}

建議的工具函數

class WordElementHelper:
    """Word 元素處理助手"""
    
    # 預計算常用標籤
    TAGS = {
        'p': qn('w:p'),
        'tbl': qn('w:tbl'),
        'tr': qn('w:tr'),
        'tc': qn('w:tc'),
        'r': qn('w:r'),
        't': qn('w:t'),
        'sectPr': qn('w:sectPr'),
        'pPr': qn('w:pPr'),
        'rPr': qn('w:rPr'),
    }
    
    # 內部類名映射
    TYPE_NAMES = {
        'CT_P': 'paragraph',
        'CT_Tbl': 'table',
        'CT_Tr': 'table_row',
        'CT_Tc': 'table_cell',
        'CT_R': 'run',
        'CT_Text': 'text',
        'CT_SectPr': 'section_properties',
    }
    
    @classmethod
    def is_paragraph(cls, elem):
        return elem.tag == cls.TAGS['p']
    
    @classmethod
    def is_table(cls, elem):
        return elem.tag == cls.TAGS['tbl']
    
    @classmethod
    def get_type(cls, elem):
        for name, tag in cls.TAGS.items():
            if elem.tag == tag:
                return name
        return None
    
    @classmethod
    def get_type_by_class(cls, elem):
        """通過類名獲取類型"""
        type_name = type(elem).__name__
        return cls.TYPE_NAMES.get(type_name, 'unknown')

# 使用
helper = WordElementHelper()
for elem in doc.element.body:
    # 方法1：標籤比對
    if helper.is_paragraph(elem):
        print("找到段落（標籤比對）")
    
    # 方法2：類型比對
    elem_type = helper.get_type_by_class(elem)
    print(f"元素類型：{elem_type}")