Source code for quantlaw.de_extract.stemming
import re
[docs]def stem_law_name(name):
"""
Stems name of laws to prepare for recognizing laws in the code
"""
result = re.sub(
r"(?<!\b)(er|en|es|s|e)(?=\b)", "", name.strip(), flags=re.IGNORECASE
)
return clean_name(result)
[docs]def clean_name(name: str) -> str:
"""
Bring the name into a standard format by replacing multiple spaces and characters
specific for German language
"""
result = re.sub(r"\s+", " ", name)
return (
result.replace("ß", "ss")
.lower()
.replace("ä", "ae")
.replace("ü", "ue")
.replace("ö", "oe")
)