Source code for isbnlib._core
# -*- coding: utf-8 -*-
# isbnlib - tools for extracting, cleaning and transforming ISBNs
# Copyright (C) 2014-2023 Alexandre Lima Conde
# SPDX-License-Identifier: LGPL-3.0-or-later
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""isbnlib main file for ISBN manipulation.
Tools for extracting, cleaning, transforming and validating ISBN ids.
"""
import logging
import re
LOGGER = logging.getLogger(__name__)
RE_ISBN10 = re.compile(r'ISBN\x20(?=.{13}$)\d{1,5}([- ])\d{1,7}'
r'\1\d{1,6}\1(\d|X)$|[- 0-9X]{10,16}')
RE_ISBN13 = re.compile(r'97[89]{1}(?:-?\d){10,16}|97[89]{1}[- 0-9]{10,16}')
RE_STRICT = re.compile(
r'^(?:ISBN(?:-1[03])?:? )?(?=[0-9X]{10}$|'
r'(?=(?:[0-9]+[- ]){3})'
r'[- 0-9X]{13}$|97[89][0-9]{10}$|'
r'(?=(?:[0-9]+[- ]){4})'
r'[- 0-9]{17}$)(?:97[89][- ]?)?[0-9]{1,5}'
r'[- ]?[0-9]+[- ]?[0-9]+[- ]?[0-9X]$',
re.I | re.M | re.S,
)
RE_NORMAL = re.compile(
r'97[89]{1}-?[0-9]{10}|'
r'97[89]{1}-[-0-9]{13}|'
r'\d{9}[0-9X]{1}|'
r'[-0-9X]{10,16}',
re.I | re.M | re.S,
)
RE_LOOSE = re.compile(r'[- 0-9X]{10,19}', re.I | re.M | re.S)
ISBN13_PREFIX = '978' # just the prefix with isbn-10
LEGAL = '0123456789xXisbnISBN- '
def check_digit10(firstninedigits):
"""Check sum ISBN-10."""
# minimum checks
if len(firstninedigits) != 9:
return ''
try:
int(firstninedigits)
except ValueError: # pragma: no cover
return ''
# checksum
val = sum(
(i + 2) * int(x) for i, x in enumerate(reversed(firstninedigits)))
remainder = int(val % 11)
if remainder == 0:
tenthdigit = 0
else:
tenthdigit = 11 - remainder
if tenthdigit == 10:
tenthdigit = 'X'
return str(tenthdigit)
def check_digit13(firsttwelvedigits):
"""Check sum ISBN-13."""
# minimum checks
if len(firsttwelvedigits) != 12:
return ''
try:
int(firsttwelvedigits)
except ValueError: # pragma: no cover
return ''
# checksum
val = sum(
(i % 2 * 2 + 1) * int(x) for i, x in enumerate(firsttwelvedigits))
thirteenthdigit = 10 - int(val % 10)
if thirteenthdigit == 10:
thirteenthdigit = '0'
return str(thirteenthdigit)
def _check_structure10(isbn10like):
"""Check structure of an ISBN-10."""
return bool(re.match(RE_ISBN10, isbn10like))
def _check_structure13(isbn13like):
"""Check structure of an ISBN-13."""
return bool(re.match(RE_ISBN13, isbn13like))
[docs]
def is_isbn10(isbn10):
"""Validate as ISBN-10."""
isbn10 = canonical(isbn10)
if len(isbn10) != 10:
return False # pragma: no cover
return bool(not check_digit10(isbn10[:-1]) != isbn10[-1])
[docs]
def is_isbn13(isbn13):
"""Validate as ISBN-13."""
isbn13 = canonical(isbn13)
if len(isbn13) != 13:
return False # pragma: no cover
if isbn13[0:3] not in ('978', '979'):
return False
return bool(not check_digit13(isbn13[:-1]) != isbn13[-1])
def to_isbn10(isbn13):
"""Transform isbn-13 to isbn-10."""
isbn13 = canonical(isbn13)
# Check prefix
if isbn13[:3] != ISBN13_PREFIX:
return isbn13 if len(isbn13) == 10 and is_isbn10(isbn13) else ''
if not is_isbn13(isbn13):
return ''
isbn10 = isbn13[3:]
check = check_digit10(isbn10[:-1])
# Change check digit
return isbn10[:-1] + check if check else ''
def to_isbn13(isbn10):
"""Transform isbn-10 to isbn-13."""
isbn10 = canonical(isbn10)
if len(isbn10) == 13 and is_isbn13(isbn10):
return isbn10
if not is_isbn10(isbn10):
return ''
isbn13 = ISBN13_PREFIX + isbn10[:-1]
check = check_digit13(isbn13)
return isbn13 + check if check else ''
def canonical(isbnlike):
"""Keep only numbers and X."""
numb = [c for c in isbnlike if c in '0123456789Xx']
if numb and numb[-1] == 'x':
numb[-1] = 'X'
isbn = ''.join(numb)
# Filter some special cases
if (isbn and len(isbn) not in (10, 13)
or isbn in ('0000000000', '0000000000000', '000000000X')
or isbn.find('X') not in (9, -1) or isbn.find('x') != -1):
return ''
return isbn
def clean(isbnlike):
"""Clean ISBN (only legal characters)."""
cisbn = [c for c in isbnlike if c in LEGAL]
buf = re.sub(r'\s*-\s*', '-', ''.join(cisbn))
return re.sub(r'\s+', ' ', buf).strip()
def notisbn(isbnlike, level='strict'):
"""Check with the goal to invalidate isbn-like.
level:
'strict' when certain they are not ISBNs (default)
'loose' only filters obvious NO ISBNs
"""
if level not in ('strict', 'loose'): # pragma: no cover
LOGGER.error('level as no option %s', level)
return None
isbnlike = canonical(isbnlike)
if len(isbnlike) not in (10, 13):
return True
if level != 'strict':
return False
if len(isbnlike) == 10:
return not is_isbn10(isbnlike)
return not is_isbn13(isbnlike)
def get_isbnlike(text, level='normal'):
"""Extract all substrings that seem like ISBNs.
level:
strict almost as certain they are ISBNs
normal (default)
loose catch many as possible
"""
if level == 'normal': # pragma: no cover
isbnlike = RE_NORMAL
elif level == 'strict':
isbnlike = RE_STRICT
elif level == 'loose':
isbnlike = RE_LOOSE
else:
LOGGER.error('level as no option %s', level)
return []
return isbnlike.findall(text)
def get_canonical_isbn(isbnlike, output='bouth'):
"""Extract ISBNs and transform them to the canonical form.
output:
isbn10
isbn13
bouth (default)
"""
if output not in ('bouth', 'isbn10', 'isbn13'): # pragma: no cover
LOGGER.error('output as no option %s', output)
return ''
regex = RE_NORMAL
match = regex.search(isbnlike)
if match:
# Get only canonical characters
cisbn = canonical(match.group())
if not cisbn:
return ''
# Split into a list
chars = list(cisbn)
# Remove the last digit from `chars` and assign it to `last`
last = chars.pop()
buf = ''.join(chars)
if len(chars) == 9:
# Compute the ISBN-10 checksum digit
check = check_digit10(buf)
else:
# Compute the ISBN-13 checksum digit
check = check_digit13(buf)
# If checksum OK return a `canonical` ISBN
if str(check) == last:
if output == 'bouth':
return cisbn
if output == 'isbn10':
return cisbn if len(cisbn) == 10 else to_isbn10(cisbn)
return to_isbn13(cisbn) if len(cisbn) == 10 else cisbn
return ''
def ean13(isbnlike):
"""Transform an `isbnlike` string in an EAN number (canonical ISBN-13)."""
ib = canonical(isbnlike)
if len(ib) == 13:
return ib if is_isbn13(ib) else ''
if len(ib) == 10:
return to_isbn13(ib) if is_isbn10(ib) else ''
return ''
# Alias
EAN13 = ean13
GTIN13 = ean13