From 2c9359c16c3ecaa767e89ba2b715c166e6851a2f Mon Sep 17 00:00:00 2001 From: gtema Date: Wed, 26 Oct 2022 16:25:27 +0000 Subject: [PATCH] Fix CES urn:smn:.... Reviewed-by: Goncharov, Artem Co-authored-by: gtema Co-committed-by: gtema --- otc_doc_convertor/convertor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/otc_doc_convertor/convertor.py b/otc_doc_convertor/convertor.py index 177283a7..c58ac075 100644 --- a/otc_doc_convertor/convertor.py +++ b/otc_doc_convertor/convertor.py @@ -331,6 +331,8 @@ class OTCDocConvertor: r"^([^a-zA-Z0-9\s]{10,})$", # OBS special chars - "\$" "\\" etc r"^(\\[\$\\bfnrtvu]{1})$", + # CES contains: urn:smn:([a-z]|[A-Z]|[0-9]|\\-){1,32}:.... + r"\s(urn:smn:\(.*)\.", ] for to_rawize in rawize_strings: for p in soup.body.find_all(string=re.compile(to_rawize)):