Fix issue 983 PDF UnicodeDecodeError

Parse PDF lines as unicode to prevent UnicodeDecodeError when a
non-ASCII character is encountered.
This commit is contained in:
ayleph 2015-12-04 02:02:02 -05:00 committed by Christopher Allan Webber
parent fd07dd6da9
commit e2b44bd7a7

View File

@ -207,7 +207,7 @@ def pdf_info(original):
_log.debug('pdfinfo could not read the pdf file.') _log.debug('pdfinfo could not read the pdf file.')
raise BadMediaFail() raise BadMediaFail()
lines = [l.decode() for l in lines] lines = [l.decode('utf-8', 'replace') for l in lines]
info_dict = dict([[part.strip() for part in l.strip().split(':', 1)] info_dict = dict([[part.strip() for part in l.strip().split(':', 1)]
for l in lines if ':' in l]) for l in lines if ':' in l])