Fix extract_approx_int not working for non-approx ints, make extract_int more robust
For example, "354 subscribers" wasn't being extracted correctly be extract_approx_int. Make extract_approx_int and extract_int only extract integers that are words. So e.g. 342 will not be extracted from internetuser342
This commit is contained in:
parent
a428d47bde
commit
3200d66d88
@ -135,7 +135,7 @@ def extract_int(string, default=None):
|
||||
string = extract_str(string)
|
||||
if not string:
|
||||
return default
|
||||
match = re.search(r'(\d+)', string.replace(',', ''))
|
||||
match = re.search(r'\b(\d+)\b', string.replace(',', ''))
|
||||
if match is None:
|
||||
return default
|
||||
try:
|
||||
@ -149,7 +149,7 @@ def extract_approx_int(string):
|
||||
string = extract_str(string)
|
||||
if not string:
|
||||
return None
|
||||
match = re.search(r'(\d+(?:\.\d+)?[KMBTkmbt])', string.replace(',', ''))
|
||||
match = re.search(r'\b(\d+(?:\.\d+)?[KMBTkmbt]?)\b', string.replace(',', ''))
|
||||
if match is None:
|
||||
return None
|
||||
return match.group(1)
|
||||
|
Loading…
x
Reference in New Issue
Block a user