From 1ad8f751eb1d97dae2e76ba3f5ed18bf2e9f7470 Mon Sep 17 00:00:00 2001 From: AI Assistant Date: Wed, 21 Jan 2026 20:30:56 +0000 Subject: [PATCH] feat: native support for tracked changes (w:ins in paragraph.text, w:delText in run.text) --- src/docx/oxml/__init__.py | 1 + src/docx/oxml/text/paragraph.py | 4 +-- src/docx/oxml/text/run.py | 4 ++- tests/test_tracked_changes_fix.py | 54 +++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 tests/test_tracked_changes_fix.py diff --git a/src/docx/oxml/__init__.py b/src/docx/oxml/__init__.py index 37f608cef..e7b7ea300 100644 --- a/src/docx/oxml/__init__.py +++ b/src/docx/oxml/__init__.py @@ -76,6 +76,7 @@ register_element_cls("w:ptab", CT_PTab) register_element_cls("w:r", CT_R) register_element_cls("w:t", CT_Text) +register_element_cls("w:delText", CT_Text) # --------------------------------------------------------------------------- # header/footer-related mappings diff --git a/src/docx/oxml/text/paragraph.py b/src/docx/oxml/text/paragraph.py index 63e96f312..e2203d20b 100644 --- a/src/docx/oxml/text/paragraph.py +++ b/src/docx/oxml/text/paragraph.py @@ -57,7 +57,7 @@ def clear_content(self): @property def inner_content_elements(self) -> List[CT_R | CT_Hyperlink]: """Run and hyperlink children of the `w:p` element, in document order.""" - return self.xpath("./w:r | ./w:hyperlink") + return self.xpath("./w:r | ./w:hyperlink | ./w:ins/w:r | ./w:moveTo/w:r") @property def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]: @@ -99,7 +99,7 @@ def text(self): # pyright: ignore[reportIncompatibleMethodOverride] Inner-content child elements like `w:r` and `w:hyperlink` are translated to their text equivalent. """ - return "".join(e.text for e in self.xpath("w:r | w:hyperlink")) + return "".join(e.text for e in self.xpath("w:r | w:hyperlink | w:ins/w:r | w:moveTo/w:r")) def _insert_pPr(self, pPr: CT_PPr) -> CT_PPr: self.insert(0, pPr) diff --git a/src/docx/oxml/text/run.py b/src/docx/oxml/text/run.py index 7496aa616..d556097d6 100644 --- a/src/docx/oxml/text/run.py +++ b/src/docx/oxml/text/run.py @@ -35,6 +35,7 @@ class CT_R(BaseOxmlElement): cr = ZeroOrMore("w:cr") drawing = ZeroOrMore("w:drawing") t = ZeroOrMore("w:t") + delText = ZeroOrMore("w:delText") tab = ZeroOrMore("w:tab") def add_t(self, text: str) -> CT_Text: @@ -75,6 +76,7 @@ def iter_items() -> Iterator[str | CT_Drawing | CT_LastRenderedPageBreak]: " | w:noBreakHyphen" " | w:ptab" " | w:t" + " | w:delText" " | w:tab" ): if isinstance(e, (CT_Drawing, CT_LastRenderedPageBreak)): @@ -134,7 +136,7 @@ def text(self) -> str: equivalent. """ return "".join( - str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:tab") + str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:delText | w:tab") ) @text.setter diff --git a/tests/test_tracked_changes_fix.py b/tests/test_tracked_changes_fix.py new file mode 100644 index 000000000..ac9cd3ec4 --- /dev/null +++ b/tests/test_tracked_changes_fix.py @@ -0,0 +1,54 @@ +from docx.oxml import parse_xml +from docx.text.paragraph import Paragraph +from docx.text.run import Run + +class DescribeTrackedChanges: + def it_includes_insertions_in_paragraph_text(self): + """ + paragraph.text includes text within tags. + """ + xml = ( + '' + ' Start ' + ' ' + ' Inserted' + ' ' + ' End' + '' + ) + p = Paragraph(parse_xml(xml), None) + # Expected: "Start Inserted End" + # Before Fix: "Start End" + assert p.text == "Start Inserted End" + + def it_excludes_deletions_in_paragraph_text(self): + """ + paragraph.text still excludes text within tags (standard behavior). + """ + xml = ( + '' + ' Start ' + ' ' + ' Deleted' + ' ' + ' End' + '' + ) + p = Paragraph(parse_xml(xml), None) + assert p.text == "Start End" + + def it_includes_moved_text_destination(self): + """ + paragraph.text includes text within tags (treated as accepted/visible). + """ + xml = ( + '' + ' Start ' + ' ' + ' Moved Text' + ' ' + ' End' + '' + ) + p = Paragraph(parse_xml(xml), None) + assert p.text == "Start Moved Text End"