From 410ad07a68926cbf7f53820bc58f00f2b10d25ca Mon Sep 17 00:00:00 2001 From: vladimirhasko Date: Mon, 20 Mar 2023 11:29:12 +0000 Subject: [PATCH] improving the conversion based on A.G. feedback --- otc_doc_convertor/convertor.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/otc_doc_convertor/convertor.py b/otc_doc_convertor/convertor.py index 72ac7a497..4c6d3761a 100644 --- a/otc_doc_convertor/convertor.py +++ b/otc_doc_convertor/convertor.py @@ -439,17 +439,17 @@ class OTCDocConvertor: # Special case for multiple asterisks and colons like ecs:*:* re_escape = re.compile(r"([:])(\*+)") + re_escape_new = re.compile(r"([:])(\*)[^$]") for p in soup.body.find_all(string=re_escape): if p.string and (p.parent.name == "p" or p.parent.name == "li"): - p.string.replace_with( - re.sub(re_escape, r"\1``\2``", p.string)) + string=p.string + while re.search(re_escape, string): + if re.search(re_escape_new, string): + string=re.sub(re_escape, r"\1``\2``", string, count=1) + else: + p.string.replace_with(string) + break - # Additional loop fixing false positives from previous loop - re_escape = re.compile(r":``\*``$") - for p in soup.body.find_all(string=re_escape): - if p.string and (p.parent.name == "p" or p.parent.name == "li"): - p.string.replace_with( - re.sub(re_escape, r":*", p.string)) # Drop parent link at the bottom of the page for parent in soup.body.find_all("p", class_="familylinks"):