Fix extract_approx_int not working for non-approx ints, make extract_int more robust

For example, "354 subscribers" wasn't being extracted correctly be extract_approx_int.
Make extract_approx_int and extract_int only extract integers that are words.
So e.g. 342 will not be extracted from internetuser342
This commit is contained in:
James Taylor 2019-12-24 13:07:12 -08:00
parent a428d47bde
commit 3200d66d88

View File

@ -135,7 +135,7 @@ def extract_int(string, default=None):
string = extract_str(string)
if not string:
return default
match = re.search(r'(\d+)', string.replace(',', ''))
match = re.search(r'\b(\d+)\b', string.replace(',', ''))
if match is None:
return default
try:
@ -149,7 +149,7 @@ def extract_approx_int(string):
string = extract_str(string)
if not string:
return None
match = re.search(r'(\d+(?:\.\d+)?[KMBTkmbt])', string.replace(',', ''))
match = re.search(r'\b(\d+(?:\.\d+)?[KMBTkmbt]?)\b', string.replace(',', ''))
if match is None:
return None
return match.group(1)