#!/usr/bin/env python3
# -*- coding: utf-8 -*- vim60:fdm=marker
#
# Copyright: 2016, Maximiliano Curia <maxy@debian.org>
#
# License: ISC
#  Permission to use, copy, modify, and/or distribute this software for any
#  purpose with or without fee is hereby granted, provided that the above
#  copyright notice and this permission notice appear in all copies.
#  .
#  THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
#  REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
#  AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
#  INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
#  LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
#  OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
#  PERFORMANCE OF THIS SOFTWARE.

''' Generic functions used by the parsers. '''

import re

from collections import namedtuple

# Local modules
from .datatypes import CopyrightHolder, ReSub

# Internal types
ReText = namedtuple('ReText', ('re', 'text'))
ReLicense = namedtuple('ReLicense', ('re', 'license', 'get_detail'))


COMMENTS_SUBS = (
    # White without newlines
    ReSub(re.compile(r'[ \t\r\f\v]{2,}'), r' '),
    # Fortran
    ReSub(re.compile(r'^ ?[cC]\s', re.MULTILINE), r' '),
    # C / C++
    ReSub(re.compile(r'\*/|/[/*]'), r' '),
    # Lisp
    ReSub(re.compile(r'(^\s?;;|;;$)', re.MULTILINE), r' '),
    # (HT|X)ML
    ReSub(re.compile(r'(^\s?<!--|-->\s?$)', re.MULTILINE), r' '),
    # m4
    ReSub(re.compile(r'^dnl\s', re.MULTILINE), r' '),
    # Generic
    ReSub(re.compile(r'^[\s\W]+\s', re.MULTILINE), r' '),
    # Non text
    ReSub(re.compile(r'[^ \w.,@;()/+-]+'), r' '),
    # White with newlines
    ReSub(re.compile(r'\s{2,}'), r' '),
)


def clean_comments(text):
    for sub in COMMENTS_SUBS:
        text = sub.re.sub(sub.repl, text)

    return text


COPYRIGHT_PRE_IGNORE_REGEX = r'''
    (?:^[#]\s*define\s+.*\([^)]*c[^)]*\)     # #define foo(c) -- not copyright
      |^(?:msgstr|msgid)\s                   # po files data
      |We\sprotect\syour\srights\swith\s
       (?:two\ssteps:
         |a\stwo-step\smethod:)\s\(1\)\s(?:we\s)?copyright
      |and\sput\sthe\sfollowing\scopyright
      |connection\swith\sthe\scopyright
      |applicable\scopyright\slaw\sincludes
      |Original\scopyright\sfollows                     # mostly gpl stuff
      |compilation\sand\sits\sresulting\scopyright
      |under\sapplicable\scopyright\slaw                # clean up
      |copyright\son\sthe\sProgram,\sand
      |copyright\son\sthe\ssoftware,\sand
      |can\sgive\sappropriate\scopyright\spermission
      |requiring\scopyright\spermission
      |copyright\streaty\sadopted\son
      |copyright\sby\stheir\scontributors
      |the\ same\ copyright\ terms
      |for\ copyright\ years
      |Added\ copyright\ notes
      |adjust\ copyright
      |use\ copyright\ ranges
      |disclaims\ copyright                             # public domain
      |the\ applicable\ copyright                       # creative commons
      |under\ copyright\ law\ or
      |free\ from\ copyright\ or
      |WIPO\ Copyright\ Treaty
      |requirements\ and\ penalties\ of\ the\ U\.S\.\ Copyright   # Academic
      |Grant\ of\ Copyright\ License\.                            # Academic
      |prohibited\ by\ U\.S\.\ copyright\ law                     # Academic
      |copyright\ doctrines\ of\ fair\ use                        # MPL-2.0
      |group_by\ copyright            # found in decopy
      |copyright\ file                # found in decopy
      |copyright\ attribution         # found in decopy
      |copyright\ format              # found in decopy
      |copyright\ helper              # found in decopy
      |copyright\ detection           # found in decopy
      |Copyright\sflag                # found in krita code
      |{\scopyright\s=\sc;\s}         # found in plan code
      |Quick\scopyright\s:\sgimp_quick_copyright # krita gmic
      # found in muffin/nemo/cinnamon-c-c
      |\bcopyright\s(?:info|headers?|lines?|messages?|reasons?|symbols?)\b
      |\bUpdate\scopyright\b          # found in muffin
      |LTEXT.*Copyright.*IDC_STATIC   # found in breakpad
      |copyright\ shortname           # found in a changelog
      |copyright\ es\ ilegal          # found in a translation
      |copyright\ existente           # found in a translation
      |copyright\ existent            # found in a translation
      |copyright\ on\ that\ code      # found in kdepimlibs
      |copyright\ on\ this\ code      # found in boringssl
      |for\ copyright\ in             # found in telepathy-logger-qt
      |Fix\ copyright                 # changelog entry
      |concern\ over\ copyright       # found in marble
      |copyright\ &copy;\ @copyright@ # found in marble
      |Copyright\ with\ his\ permission     # found in boringssl
      |optional\ string\ copyright\ =       # found in dom_distiller_js
      |copyright\ &\ original\ defaults     # found in ffmpeg
      |--copyright                          # found in ffmpeg
      |skip\ copyright                      # found in ffmpeg
      |U_COPYRIGHT_STRING              # found in icu
      |LATIN\ CAPITAL\ LETTER\ C       # found in icu
      |copyright\ at\ the\ end         # found in icu, thanks, I guess
      |copyright\ generation           # found in icu
      |print\ our\ copyright           # found in icu
      |copyright\ into\ the            # found in icu
      |Copyright\ (?:Exists|chunk|length|offset|signature|it\.)   # found in ffmpeg
      |\bif\b.*\(\s?c\s?\)             # damn you (c)
      |\bwhile\b.*\(\s?c\s?\)          # damn you (c)
      |Similar\sto\s\(\s?c\s?\)        # damn you (c)
      |\(\s?c\s?\)\sit\smakes\suse     # damn you (c)
      |replaced\s\(C\)                 # damn you (c)
      |\(c\)\ have                     # damn you (c)
      |\(c\)\ the\ sum                 # damn you (c)
      |\(unsigned\ int\)               # damn you (c)
      |\(\*?a\),\ ?\(b\),\ ?\(c\)      # damn you (c)
      |\(\w\).*[^\s,;]\s*\(\w\)        # damn you (c)
      |\(c\).*\(c2\)                   # damn you (c)
      |ISUPPER\ ?\(c\)                 # damn you (c)
      |\(c\)\ You\ must\ retain        # damn you (c)
      |\(c\)\ ?[\*/&|<>\+-]?=          # damn you (c)
      |\(c\)\ ?\)                      # damn you (c)
      |sizeof\([^)]+\)                 # damn you (c)
      |\(c\)(?:->|\.)[^ ]              # damn you (c)
      |=\s*\(c\)\s*\*.*;               # damn you (c) var = (c) * ... ;
      |\(c\)\s*[*/+-]\s*\(             # damn you (c) (c) * (...
      |\(c\)\ and\ its\ data           # damn you (c)
      |character\ \(c\)                # damn you (c)
      |\(c\)\ differs\ for             # damn you (c)
      |combining\ mark\ \(c\)          # damn you (c)
      |\(c\)\ *processed                 # damn you (c)
      |\(c\)\ data\ (?:[^ ]+\ ){,5}has\ been\ received  # damn you (c)
      |shown\sby\s\(c\)\ssupports
      |AND\s\(c\)
      |following\scopyright\sand\slicense   # more text
      |\bcopyright\ssign\b
      |"Country\ Code\ \(C\)"         # found in qca
      |\bcopyright\ and\b             # found in cinnamon
      |\bCopyright:?\s(?:\(\s*C\s*\)\s)?YEAR\b          # template used in po
      # found in cinnamon-settings-daemon
      |free\sof\sknown\scopyright\srestrictions
      |[-\s–—]%[ds][-\s–—]
      |<\*\*authorname\*\*>                          # template (marble)
      |%{AUTHOR}                                     # template (kcoreaddons)
      |krazy:exclude(?:all)?=[^\s]*copyright         # tagged to be ignored
      # Test case in ark
      |UNRAR\s+[0-9.]+\ (\(iconv\)|beta\ [0-9])\ freeware\s+Copyright\ \(c\)\ [0-9-]+\ Alexander\ Roshal
    )'''
COPYRIGHT_PRE_IGNORE_RE = re.compile(COPYRIGHT_PRE_IGNORE_REGEX,
                                     re.IGNORECASE | re.VERBOSE)

COPYRIGHT_INDICATOR_REGEX = r'''(?:^|\s)
    (
       (?:
          copyright(?:\s*\(c\))?  # The full word
         |copr\.             # Legally-valid abbreviation
         |©                  # Unicode character COPYRIGHT SIGN
         |\xa9               # Unicode copyright sign encoded in iso8859
         |&copy;             # HTML copyright sign
       )(?:[:\s]+)
      |(?:\(\s?c\s?\))(?:[:\s]*)   # Legally-null representation of sign
    )(?=\S)'''
COPYRIGHT_INDICATOR_RE = re.compile(COPYRIGHT_INDICATOR_REGEX,
                                    re.IGNORECASE | re.VERBOSE)

COPYRIGHT_POST_IGNORE_REGEX = r'''
    \b(?:
        information             # Discussing copyright information
       |(notice|statement|claim|string)s? # Discussing the notice
       |\<?name\ of\ author\>?  # GPL template
       |YEAR\s+YOUR\s+NAME      # template
       # |and|or used in: and/or its subsidiary(-ies).
       # |in used in .in tld
       |\ in\                   # Part of a sentence
       |is|to|if                # Part of a sentence
       |(holder|owner)s?        # Part of a sentence
       |ownership               # Part of a sentence
       |IDC_STATIC              # template
    )\b
    |(?:^|\s)(?:
        &&|\|\|                 # (c) conditional
       |\(\s?[abd-f]\s?\)       # ugly one letter expressions
    )(?:\s|$)
    |^\s*(?:
        law[.:]?                # GPL text
       |{                       # if (c) {
       |L?GPL$                  # Copyright: License
       |@\w+\{\w+\}             # template
       )\s*$'''
COPYRIGHT_POST_IGNORE_RE = re.compile(COPYRIGHT_POST_IGNORE_REGEX,
                                      re.IGNORECASE | re.VERBOSE)

CRUFT_SUBS = (
    ReSub(re.compile(r'''(?:(?:some|all)? rights reserved'''
                     r'''|(?:some|all) rights)[\s,.;\*#'"]*''',
                     re.IGNORECASE), r''),
    ReSub(re.compile(r'It can be distributed', re.IGNORECASE), r''),
    ReSub(re.compile(r'and contributors', re.IGNORECASE), r''),
    ReSub(re.compile(r'et al', re.IGNORECASE), r''),
    ReSub(re.compile(r'\band$', re.IGNORECASE), r''),
    ReSub(re.compile(r'\\$'), r''),
    ReSub(re.compile(r'''[\s,.;\*#'"]*$'''), r''),
    ReSub(re.compile(r'\(\sc\s\)', re.IGNORECASE), r''),
    ReSub(re.compile(r'\s{2,}'), r' '),
    ReSub(re.compile(r'^\s+'), r''),
    ReSub(re.compile(r'\\@'), r'@'),
    ReSub(re.compile(r'&ndash;'), r'-'),
)


def _parse_copyright_continuation(text, continuation):

    if not continuation:
        return '', None
    prefix = continuation.string[:continuation.start(1)]
    # Special case for C style multiline comments
    alt_prefix = prefix.replace('/*', ' *')
    if not text.startswith(prefix) and not text.startswith(alt_prefix):
        return '', None
    cont_match = re.match(r'\s+', text[len(prefix):])
    if not cont_match:
        return '', None
    rest = cont_match.string[cont_match.end():]
    match = continuation
    return rest, match


def parse_copyright(text, continuation=None):

    if COPYRIGHT_PRE_IGNORE_RE.search(text):
        return None, None

    match = COPYRIGHT_INDICATOR_RE.search(text)
    if match:
        rest = match.string[match.end():]
    else:
        # Process continuations
        rest, match = _parse_copyright_continuation(text, continuation)
        if not match:
            return None, None
    if COPYRIGHT_POST_IGNORE_RE.search(rest):
        return None, None

    copyrights = []
    parts = COPYRIGHT_INDICATOR_RE.split(rest)
    for i, part in enumerate(parts):
        if i % 2:
            # the re split includes the separators, skip them
            continue
        for sub in CRUFT_SUBS:
            part = sub.re.sub(sub.repl, part)
        if part and len(part) > 2:
            copyrights.append(part)

    return copyrights, match


def parse_holders(content):

    holders = []
    continuation = None

    for line in content.split('\n'):
        copyrights, continuation = parse_copyright(line, continuation)
        if not copyrights:
            continue
        for copyright_ in copyrights:
            holder = CopyrightHolder.from_copyright(copyright_)
            if holder:
                holders.append(holder)

    return holders


BSD_RES = (
    ReSub(
        re.compile(
            r'4. If you include any Windows specific code \(or a derivative '
            r'thereof\) fromg? the apps directory \(application code\) you '
            r'must include an acknowledgement:? '
            r'"?This product includes software written by Tim Hudson '
            r'\(tjh@cryptsoft.com\)"?', re.IGNORECASE),
        'SSLeay'),
    ReSub(
        re.compile(r'All advertising materials mentioning features or use of '
                   r'this software must display the following '
                   r'acknowledge?ment.*This product includes software '
                   r'developed by', re.IGNORECASE),
        'BSD-4-clause'),
    ReSub(
        re.compile(
            r'You acknowledge that this software is not designed'
            r'(?:, licensed)? '
            r'or intended for use in the design, construction, operation or '
            r'maintenance of any nuclear facility.', re.IGNORECASE),
        'BSD-3-clause-No-Nuclear'),
    ReSub(
        re.compile(
            r'Redistributions of source code must retain the above '
            r'copyright notice, this list of conditions and the following '
            r'disclaimer.? '
            r'(?:\* )?'
            r'(?:The name(?:\(?s\)?)? .*? may not|Neither the '
            r'(?:names? .*?|authors?) n?or the names of(?: '
            r'(?:its|their|other|any))? contributors may) be used to '
            r'endorse or promote products derived from this software',
            re.IGNORECASE),
        'BSD-Source-Code'),
    ReSub(
        re.compile(
            r'Redistributions of any form whatsoever must retain the '
            r'following acknowledgment:? \'?This product includes software '
            r'developed by ',
            re.IGNORECASE),
        'BSD-3-clause-Attribution'),
    ReSub(
        re.compile(r'(?:The name(?:\(?s\)?)? .*? may not|Neither the '
                   r'(?:names? .*?|authors?) n?or the names of(?: '
                   r'(?:its|their|other|any))? contributors may) be used to '
                   r'endorse or promote products derived from this software',
                   re.IGNORECASE),
        'BSD-3-clause'),
    ReSub(
        re.compile(
            r'The views and conclusions contained in the software and '
            r'documentation are those of the authors and should not '
            r'be interpreted as representing official policies, '
            r'either expressed or implied, of the FreeBSD Project.',
            re.IGNORECASE),
        'BSD-2-clause-FreeBSD'),
    ReSub(
        re.compile(
            r'This code is derived from software contributed to The '
            r'NetBSD Foundation by',
            re.IGNORECASE),
        'BSD-2-clause-NetBSD'),
    ReSub(
        re.compile(r'Redistributions of source code must retain the above '
                   r'copyright notice', re.IGNORECASE),
        'BSD-2-clause'),
    ReSub(
        re.compile(
            r'BSD PROTECTION LICENSE '
            r'TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION, AND '
            r'MODIFICATION', re.IGNORECASE),
        'BSD_Protection_License(non-dfsg)'),
    ReSub(
        re.compile(
            r'Permission to use, copy, modify, and/?or distribute this '
            r'software for any purpose with or without fee is hereby '
            r'granted. '
            r'THE SOFTWARE IS PROVIDED "?AS IS"? AND THE AUTHOR DISCLAIMS '
            r'ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL '
            r'IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO '
            r'EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, '
            r'INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER '
            r'RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN '
            r'ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, '
            r'ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE '
            r'OF THIS SOFTWARE.', re.IGNORECASE),
        '0BSD'),
)


def parse_bsd(text, match, group):
    for sub in BSD_RES:
        match = sub.re.search(text)
        if not match:
            continue
        return match.expand(sub.repl)

    return group


CC_BY_RES = (
    ReSub(
        re.compile(r'Creative Commons (?:Legal Code )?Attribution[- ](?:[^ ]+[- ])?'
                   r'NonCommercial', re.IGNORECASE),
        r'-NC'),
    ReSub(
        re.compile(r'Creative Commons (?:Legal Code )?Attribution[- ](?:[^ ]+[- ])?'
                   r'ShareAlike', re.IGNORECASE),
        r'-SA'),
    ReSub(
        re.compile(r'Creative Commons (?:Legal Code )?Attribution[- ](?:[^ ]+[- ])?'
                   r'NoDerivatives', re.IGNORECASE),
        r'-ND'),
    ReSub(
        re.compile(r'Creative Commons (?:Legal Code )?Attribution[- ](?:[^ ]+[- ]){,2}'
                   r'([\d.]+)', re.IGNORECASE),
        r'-\1'),
)


def parse_cc_by(text, match, group):
    license_ = group
    for sub in CC_BY_RES:
        match = sub.re.search(text)
        if not match:
            continue
        license_ += match.expand(sub.repl)
    return license_


GNU_VERSION_RES = (
    ReSub(
        re.compile(
            r'version ([\d.]+) (?:of the License,? )?or '
            r'(?:\(? ?at your option ?\)? )?'
            r'version ([\d.]+),? or,? (?:(?:any )?later versions? '
            r'(?:accepted|approved) by the '
            r'membership of KDE ?e\.?V\.?|at the discretion of KDE ?e\.?V\.? '
            r'(?:[^ ]+ ){,20}any later version)', re.IGNORECASE),
        r'\1+\2+KDEeV'),
    ReSub(
        re.compile(
            r'General Public License version ([\d.]+) (?:and|or),? '
            r'(?:\(? ?at your option ?\)? )?version '
            r'([\d.]+) as published by the Free Software Foundation',
            re.IGNORECASE),
        r'\1+\2'),
    ReSub(
        re.compile(
            r'General Public License as published by the Free Software '
            r'Foundation[,;]? (?:either )?version ([\d.]+) '
            r'(?:of the License,? )?'
            r'(?:and|or),? (?:\(? ?at your option ?\)? )?(?:version )?'
            r'([\d.]+?)[.,;]? ',
            re.IGNORECASE),
        r'\1+\2'),
    ReSub(
        re.compile(
            r'version ([\d.]+) of the License,? or,? '
            r'(?:\(? ?at your option ?\)? )?'
            r'(?:any later version accepted by the '
            r'membership of KDE e.V.|at the discretion of KDE ?e\.?V\.? '
            r'(?:[^ ]+ ){,20}any later version)', re.IGNORECASE),
        r'\1+KDEeV'),
    ReSub(
        re.compile(r'either version ([^ ]+?)(?: of the License)?[;,] '
                   r'or[ ,(]+at your option[ ,)]+any later version',
                   re.IGNORECASE), r'\1+'),
    ReSub(
        re.compile(
            r'(?:modify it|used) under the terms of the GNU (?:Affero )?'
            r'(?:Lesser(?:/Library)? |Library(?:/Lesser)? )?General Public '
            r'License (?:as published by the Free Software Foundation[;,]? )?'
            r'(?:either )?version ?([^ ]+?)[;,]? (?:as published by the Free '
            r'Software Foundation[;,]? )?(?:of the Licen[sc]e,? )?or '
            r'(?:later|(?:\(? ?at your option ?\)?,? )?any later version)',
            re.IGNORECASE),
        r'\1+'),
    ReSub(
        re.compile(r'licensed under (?:[^ ]+ ){,10}[AL]?GPL,? ?(?:version|v)'
                   r' ?([\d.]+) (?:(?:or )?(?:higher|later)|or[, (]+at your '
                   r'option[, )]+any later version)',
                   re.IGNORECASE),
        r'\1+'),
    ReSub(
        re.compile(
            r'General Public License,? Version ([\d.]+) or later',
            re.IGNORECASE),
        r'\1+'),
    ReSub(
        re.compile(
            r'under the terms of (?:the )?version ([^ ]+) or later of the '
            r'(?:GNU|[AL]?GPL)', re.IGNORECASE),
        r'\1+'),
    ReSub(
        re.compile(r'GNU (?:Affero )?(?:Lesser(?:/Library)? |Library )?'
                   r'General Public License (?:as )?published by the Free '
                   r'Software Foundation[;,] either version ([^ ]+?)[.,]? '
                   r'(?:of the License )?or '
                   r'(?:\(at your option\) )?any later version',
                   re.IGNORECASE),
        r'\1+'),
    ReSub(
        re.compile(r'licensed under (?:[^ ]+ ){,10}[AL]?GPL ?(?:version|v)'
                   r' ?([\d.]+?)[.,]? ',
                   re.IGNORECASE),
        r'\1'),
    ReSub(
        re.compile(r'(?:used|modify it) under the terms (?:and conditions )?'
                   r'of the GNU '
                   r'(?:Affero )?'
                   r'(?:Lesser(?:/Library)? |Library )?General Public '
                   r'License,? (?:as published by the Free Software '
                   r'Foundation;? )?(?:either )?'
                   r'Version ([\d.]+?)(?:[.,;]|as)? ', re.IGNORECASE), r'\1'),
    ReSub(
        re.compile(r'GNU (?:Affero )?(?:Lesser(?:/Library)? |Library )?'
                   r'General Public License, version ([^ ]+?)[ .,;]',
                   re.IGNORECASE), r'\1'),
    ReSub(
        re.compile(r'version ([\d.]+?)[.,]? (?:\(?only\)?.? )?'
                   r'(?:of the GNU (?:Affero )?'
                   r'(?:Lesser(?:/Library)? |Library )?'
                   r'General Public License )?(?:as )?published by the '
                   r'Free Software Foundation', re.IGNORECASE), r'\1'),
    ReSub(
        re.compile(r'GNU (?:Affero )?(?:Lesser(?:/Library)? |Library )?'
                   r'General Public License (?:as )?published by the Free '
                   r'Software Foundation[;,] version ([^ ]+?)[.,]? ',
                   re.IGNORECASE),
        r'\1'),
    ReSub(
        re.compile(
            r'(?:General Public License|GPL) (?:Vers.?ion )?([\d.]+?)[.,]? ',
            re.IGNORECASE),
        r'\1'),
)

GNU_EXCEPTION_RES = (
    ReSub(
        re.compile(
            r'As a special exception, if you create a document which uses '
            r'this font, and embed this font or unaltered portions of this '
            r'font into the document, this font does not by itself cause '
            r'the resulting document to be covered by the GNU General '
            r'Public License. This exception does not however invalidate '
            r'any other reasons why the document might be covered by the '
            r'GNU General Public License. If you modify this font, you may '
            r'extend this exception to your version of the font, but you '
            r'are not obligated to do so. If you do not wish to do so, '
            r'delete this exception statement from your version.',
            re.IGNORECASE),
        r'Font'),
    ReSub(
        re.compile(
            r'As a special exception, the copyright holders give permission '
            r'to link the code of portions of this program with the OpenSSL '
            r'library', re.IGNORECASE),
        r'OpenSSL'),
    ReSub(
        re.compile(
            r'As a special exception(?:, the respective Autoconf Macro[\' ]?s '
            r'copyright owner gives unlimited permission to copy, distribute '
            r'and modify the configure scripts that are the output of '
            r'Autoconf when processing the Macro\.'
            r'| to the GNU General Public License, if you distribute this '
            r'file as part of a program that contains a configuration '
            r'script generated by Autoconf, you may include it under the '
            r'same distribution terms that you use for the rest of that '
            r'program.)', re.IGNORECASE),
        r'AutoConf'),
    ReSub(
        re.compile(
            r'As a special exception, you may create a larger work that '
            r'contains part or all of the Bison parser skeleton and '
            r'distribute that work under terms of your choice', re.IGNORECASE),
        r'Bison'),
    ReSub(
        re.compile(
            r'As a special exception to the (?:[^ ]+ ){,5}if you '
            r'distribute this file as part of a program or library that '
            r'is built using GNU Libtool, you may include this file under '
            r'the same distribution terms that you use for the rest of that '
            r'program', re.IGNORECASE),
        r'LibTool'),
)


def parse_gpl(text, match, group):
    version = ''
    for sub in GNU_VERSION_RES:
        match = sub.re.search(text)
        if not match:
            continue
        version = match.expand(sub.repl)
        break
    exceptions = []
    for sub in GNU_EXCEPTION_RES:
        match = sub.re.search(text)
        if not match:
            continue
        exceptions.append(match.expand(sub.repl))

    exception = ' with {} exception'.format(
        '_'.join(sorted(exceptions))) if exceptions else ''

    return '{}{}{}'.format(group,
                           '-{}'.format(version.rstrip('0.')) if version else '',
                           exception)


GFDL_RES = (
    ReSub(
        re.compile(
            r'GNU Free Documentation License[,.; ]*(?:(?:Version|v)? [^ ]* )?'
            r'(?:or any later version published by the Free Software '
            r'Foundation; )?'
            r'with no Invariant Sections, no Front-Cover Texts, and no '
            r'Back-Cover Texts', re.IGNORECASE),
        r'-NIV'),
    ReSub(
        re.compile(
            r'GNU Free Documentation License[,.; ]*(?:Version|v)?  ?'
            r'(\d+(?:\.[\d]+)*)[-,.; ]+?', re.IGNORECASE),
        r'-\1'),
)


def parse_gfdl(text, match, group):
    license_ = group
    for sub in GFDL_RES:
        match = sub.re.search(text)
        if not match:
            continue
        license_ += match.expand(sub.repl)
    return license_


LPPL_RES = (
    ReSub(
        re.compile(
            r'LaTeX Project Public License,? (?:either )?(?:version|v) '
            r'?([\d.]+)[; ]', re.IGNORECASE),
        r'-\1'),
    ReSub(
        re.compile(
            r'LaTeX Project Public License,? (?:[^ ]+ ){,6}or '
            r'(?:\(?at your option\)? )?any later version', re.IGNORECASE),
        r'+'),
)


def parse_lppl(text, match, group):
    license_ = group
    for sub in LPPL_RES:
        match = sub.re.search(text)
        if not match:
            continue
        license_ += match.expand(sub.repl)
    return license_


ZPL_RES = (
    ReSub(
        re.compile(r'the Zope Public License,? (?:Version|v)?[ ]?([\d.]+)',
                   re.IGNORECASE),
        r'-\1'),
)


def parse_zpl(text, match, group):
    license_ = group
    for sub in ZPL_RES:
        match = sub.re.search(text)
        if not match:
            continue
        license_ += match.expand(sub.repl)
    return license_


MIT_RES = (
    ReSub(
        re.compile(
            r'Permission to use, copy, modify, and distribute this software '
            r'and its documentation for any purpose and without fee is '
            r'hereby granted, provided that the above copyright notice '
            r'appear in all copies and that both that the copyright notice '
            r'and this permission notice and warranty disclaimer appear in '
            r'supporting documentation, and that the name of the author not '
            r'be used in advertising or publicity pertaining to distribution '
            r'of the software without specific, written prior permission.',
            re.IGNORECASE),
        'Expat-like-Highscore'),
    ReSub(
        re.compile(
            r'Except as contained in this notice, the name of a copyright '
            r'holder shall not be used in advertising or otherwise to '
            r'promote the sale, use or other dealings in this Software '
            r'without prior written authorization of the copyright holder.',
            re.IGNORECASE),
        'MIT-like-icu'),
)


def parse_mit(text, match, group):
    for sub in MIT_RES:
        match = sub.re.search(text)
        if not match:
            continue
        return match.expand(sub.repl)

    return group


LICENSES_RES = (
    # public-domain
    ReLicense(
        re.compile(
            r'(:?This [^ ]+ is in|is (?:hereby |released )?'
            r'(?:in(?:to)|to|for)) '
            r'the public domain',
            re.IGNORECASE),
        'public-domain', None),
    ReLicense(
        re.compile(r'This work is free of known copyright restrictions',
                   re.IGNORECASE),
        'public-domain', None),
    ReLicense(
        re.compile(r'http[ :]/?/?creativecommons.org/publicdomain/mark/',
                   re.IGNORECASE),
        'public-domain', None),
    ReLicense(
        re.compile(r'Public Domain, originally written by', re.IGNORECASE),
        'public-domain', None),
    ReLicense(
        re.compile(r'This [^ ]+ is put into the public domain, I claim '
                   r'no copyright on that code', re.IGNORECASE),
        'public-domain', None),
    ReLicense(
        re.compile(r'[^ ]+ was written by(?: [^ ]+){,4}, and is placed in the '
                   r'public domain. The author hereby disclaims copyright to '
                   r'this source code.', re.IGNORECASE),
        'public-domain', None),
    ReLicense(
        re.compile(
            r'These files[] ]+are hereby placed in the public domain without '
            r'restrictions', re.IGNORECASE),
        'public-domain', None),
    ReLicense(
        re.compile(
            r'I hereby disclaim the copyright on this code and place it in '
            r'the public domain.', re.IGNORECASE),
        'public-domain', None),
    # Apache
    ReLicense(
        re.compile(r'under the Apache License, Version ([\d.]+)',
                   re.IGNORECASE),
        'Apache', lambda t, m, l: 'Apache-{}'.format(m.group(1))
    ),
    ReLicense(
        re.compile(r'Licensed under the Apache License v([\d.]+) '
                   r'http[ :]/?/?www\.apache\.org/licenses/LICENSE-(?:[\d.]+)',
                   re.IGNORECASE),
        'Apache', lambda t, m, l: 'Apache-{}'.format(m.group(1))
    ),
    ReLicense(
        re.compile(
            r'Apache License Version ([\d.]+), \w+ \d+ '
            r'http[ :]/?/?www\.apache\.org/licenses/? '
            r'TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION',
            re.IGNORECASE),
        'Apache', lambda t, m, l: 'Apache-{}'.format(m.group(1))
    ),
    # Artistic
    ReLicense(
        re.compile(
            r'Released under the terms of the Artistic License v?([\d.]+)',
            re.IGNORECASE),
        'Artistic', lambda t, m, l: 'Artistic-{}'.format(m.group(1))
    ),
    ReLicense(
        re.compile(
            r'This (?:[^ ]+ ){,2}is distributed under the Artistic License '
            r'(?:version )?([\d.]+)',
            re.IGNORECASE),
        'Artistic', lambda t, m, l: 'Artistic-{}'.format(m.group(1))
    ),
    ReLicense(
        re.compile(r'you (?:can|may) (?:re)?distribute (?:it and/or modify '
                   r'it )?under the terms of.* the Artistic License',
                   re.IGNORECASE),
        'Artistic', None),
    ReLicense(
        re.compile(
            r'The "?Artistic License"? '
            r'Preamble '
            r'The intent of this document is to state the conditions under '
            r'which a Package may be copied, such that the Copyright Holder '
            r'maintains some semblance of artistic control over the '
            r'development of the package, while giving the users of the '
            r'package the right to use and distribute the Package in a '
            r'more[- ]or[- ]less customary fashion, plus the right to make '
            r'reasonable modifications.', re.IGNORECASE),
        'Artistic', None),
    ReLicense(
        re.compile(r'is free software under the Artistic License',
                   re.IGNORECASE),
        'Artistic', None),
    # BSD-2-clause BSD-3-clause BSD-4-clause
    ReLicense(
        re.compile(
            r'licensed under the (?:[^ ]+ ){,3}'
            r'\(?http[ :]/?/?(?:www\.)?opensource\.org/licenses/BSD-([23])-Clause\)?',
            re.IGNORECASE),
        'BSD',
        lambda t, m, l:
            'BSD-{}-clause'.format(m.group(1))
    ),
    ReLicense(
        re.compile(r'THIS SOFTWARE IS PROVIDED .*"?AS IS"? AND ANY EXPRESS OR '
                   r'IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE '
                   r'IMPLIED WARRANTIES OF MERCHANTABILITY', re.IGNORECASE),
        'BSD', parse_bsd),
    ReLicense(
        re.compile(
            r'''THIS SOFTWARE IS PROVIDED (?:``|")?AS IS(?:''|")? AND '''
            r'WITHOUT ANY EXPRESS OR '
            r'IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE '
            r'IMPLIED WARRANTIES OF MERCHANTABILITY', re.IGNORECASE),
        'BSD', parse_bsd),
    ReLicense(
        re.compile(r'licensed under (?:[^ ]+ ){,10}the BSD license',
                   re.IGNORECASE),
        'BSD', parse_bsd),
    ReLicense(
        re.compile(r'Redistribution and use in source and binary forms, with '
                   r'or without modification, are permitted provided that '
                   r'the following conditions are met.{,32}'
                   r'Redistributions of source code must retain the above '
                   r'copyright notice, this list of conditions and the '
                   r'following disclaimer.{,32}'
                   r'Redistributions in binary form must reproduce the above '
                   r'copyright notice, this list of conditions and the '
                   r'following disclaimer in the documentation and/or other '
                   r'materials provided with the distribution.{,32}'
                   r'The name "?Carnegie Mellon University"? must not be used '
                   r'to endorse or promote products derived from this '
                   r'software without prior written permission. For '
                   r'permission or any other legal details, please contact '
                   r'Office of Technology Transfer Carnegie Mellon '
                   r'University.{,256}'
                   r'Redistributions of any form whatsoever must retain the '
                   r'following acknowledgment:? "?This product includes '
                   r'software developed by Computing Services at Carnegie '
                   r'Mellon University \(http[: ]/?/?www.cmu.edu/?computing/?\)."?',
                   re.IGNORECASE),
        'BSD', lambda *a: 'BSD_like_CMU-4-clause'),
    ReLicense(
        re.compile(
            r'Redistribution and use is allowed according to the terms of '
            r'the 2-clause BSD license.', re.IGNORECASE),
        'BSD', lambda *a: 'BSD-2-clause'),
    ReLicense(
        re.compile(
            r'Redistribution and use is allowed according to the terms of the '
            r'(?:new )?BSD license.? ([^ ]+ ){,10}COPYING-CMAKE-SCRIPTS',
            re.IGNORECASE),
        'BSD', lambda *a: 'BSD-3-clause'),
    ReLicense(
        re.compile(
            r'Redistribution and use is allowed according to the terms of the '
            r'BSD license.?', re.IGNORECASE),
        'BSD', None),
    ReLicense(
        re.compile(
            r'Distributed under the OSI-approved BSD License \(the '
            r'"?License[" ]?\)', re.IGNORECASE),
        'BSD', None),
    # TODO: learn about references to named files
    ReLicense(
        re.compile(
            r'Use of this source code is governed by a BSD-style license '
            r'that can be found in the [^ ]+ file\.', re.IGNORECASE),
        'BSD', None),
    # ISC
    ReLicense(
        re.compile(r'Permission to use, copy, modify, and(?:\/or)? distribute '
                   r'this (?:[^ ]+ ){,4}for any purpose with or without fee is '
                   r'hereby granted, provided.*copyright notice.*permission '
                   r'notice.*all copies', re.IGNORECASE),
        'ISC', None),
    # CC-BY, CC-BY-SA, CC-BY-ND, CC-BY-NC, CC-BY-NC-SA, CC-BY-NC-ND
    ReLicense(
        re.compile(r'http[ :]?/?/?creativecommons.org/licenses/by',
                   re.IGNORECASE),
        'CC-BY', parse_cc_by),
    ReLicense(
        re.compile(r'This .*is licensed under .*Creative Commons Attribution'
                   r'.*License', re.IGNORECASE),
        'CC-BY', parse_cc_by),
    ReLicense(
        re.compile(
            r'THE WORK \(?AS DEFINED BELOW\)? IS PROVIDED UNDER THE TERMS '
            r'OF THIS CREATIVE COMMONS PUBLIC LICENSE \(?[" ]?CCPL[" ]? OR '
            r'[" ]?LICENSE[" ]?\)?\. THE WORK IS PROTECTED BY COPYRIGHT '
            r'AND/OR OTHER '
            r'APPLICABLE LAW\. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED '
            r'UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED\.',
            re.IGNORECASE),
        'CC-BY', parse_cc_by),
    ReLicense(
        re.compile(
            r'License:? Creative Commons Attribution', re.IGNORECASE),
        'CC-BY', parse_cc_by),
    # CC0
    ReLicense(
        re.compile(r'http[ :]/?/?creativecommons.org/publicdomain/zero',
                   re.IGNORECASE),
        'CC0', None),
    ReLicense(
        re.compile(r'To the extent possible under law, .*the person who '
                   r'associated CC0 .*with this work has waived all copyright '
                   r'and related or neighboring rights to this work',
                   re.IGNORECASE),
        'CC0', None),
    ReLicense(
        re.compile(r'license:? CC0-1.0 ', re.IGNORECASE),
        'CC0', None
    ),
    # CDDL
    ReLicense(
        re.compile(r'terms of the Common Development and Distribution License'
                   r'(, Version ([^( ]+))? \(the[" ]*License[" ]?\)', re.IGNORECASE),
        'CDDL',
        lambda t, m, l:
            'CDDL{}'.format('-{}'.format(m.group(2)) if m.groups()[1] else '')
    ),
    # CPL
    ReLicense(
        re.compile(r'(?:under the terms of|(?:licensed|released) under the) '
                   r'.*the Common Public License', re.IGNORECASE),
        'CPL-1.0', None),
    # EFL
    ReLicense(
        re.compile(
            r'Permission is hereby granted to use, copy, modify and(?:/or) '
            r'distribute this [^ ]+ provided that:?'
            r'[-,.+* ]*copyright notices are retained unchanged'
            r'[-,.+* ]*any distribution of this [^ ]+ whether modified or '
            r'not, includes this (?:file|license text)[-,.+* \d]*'
            r'Permission is hereby also granted to distribute binary programs '
            r'which depend on this [^ ]+(?:provided that:)?'
            r'[-,.+* ]*if the binary program depends on a modified version of '
            r'this [^ ]+ you (must|are encouraged to) publicly release the '
            r'modified version of this', re.IGNORECASE),
        'EFL',
        lambda t, m, l:
            'EFL-{}'.format('1' if m.group(1).lower() == 'must' else '2')
    ),
    # Expat
    ReLicense(
        re.compile(r'(?:MIT license:?|licensed under the (?:terms of either '
                   r'the)?MIT.*|Some rights reserved:?|License:? (?:name:? )?'
                   r'MIT(?: url:?)) '
                   r'\(?http[ :]?/?/?www\.opensource\.org/licenses/mit-license\.php\)?',
                   re.IGNORECASE),
        'MIT/X11', lambda *a: 'Expat'),
    ReLicense(
        re.compile(r'Permission is hereby granted, free of charge, to any '
                   r'person obtaining a copy of this software and(?:/or)? '
                   r'associated documentation files \(the[" ]*'
                   r'(?:Software|Materials)[" ]?\), to deal in the '
                   r'(?:Software|Materials) without restriction, including '
                   r'without limitation the rights to use, copy, modify, '
                   r'merge, publish, distribute, sublicense, and(?:/or)? '
                   r'sell copies of the (?:Software|Materials?), and to '
                   r'permit persons to whom the (?:Software|Materials?) '
                   r'(?:is|are) furnished to do so, subject to the following '
                   r'conditions:? '
                   r'The above copyright notice and this permission notice '
                   r'shall be included in all copies or substantial portions '
                   r'of the (?:Software|Materials?)\.', re.IGNORECASE),
        'MIT/X11', lambda *a: 'Expat'),
    # GPL
    ReLicense(
        re.compile(
            r'GNU General Public License Usage '
            r'(?:[^ ]+ ){,2}This [^ ]+ may be used under the terms of '
            r'the GNU General '
            r'Public License version 2\.0 or 3\.0 '
            r'.{,512} Qt GPL Exception version 1\.3', re.IGNORECASE),
        'GPL', lambda *a: 'GPL-2+3 with QT-1.3 exception'),
    ReLicense(
        re.compile(
            r'GNU General Public License Usage '
            r'(?:[^ ]+ ){,2}This [^ ]+ may be used under the terms of '
            r'the GNU General '
            r'Public License version ([\d.]+) (or later)?'
            r'.{,512} (Qt GPL Exception version 1\.3)?', re.IGNORECASE),
        'GPL',
        lambda t, m, l:
            'GPL-{}{}{}'.format(m.group(1).rstrip('0.'),
                                '+' if m.groups()[1] else '',
                                ' with QT-1.3 exception' if m.groups()[2] else '')
    ),
    ReLicense(
        re.compile(
            r'(?:(?:is free software.? )?you (?:can|may) (?:re)?distribute '
            r'(?:it|them) and(?:/or)? (?:[^ ]+ ){,10}modify (?:it|them)|is '
            r'(?:distributed|'
            r'licensed)) under the terms of (?:the )?(?:version [^ ]+ '
            r'(?:or later )?'
            r'(?:\(?only\)? )?of )?the (?:GNU )?'
            r'(?:General Public License|GPL)', re.IGNORECASE),
        'GPL', parse_gpl),
    ReLicense(
        re.compile(
            r'(?:(?:is free software.? )?you (?:can|may) (?:re)?distribute '
            r'(?:it|them) and(?:/or)? modify (?:it|them)|is (?:distributed|'
            r'licensed)) under the terms of (?:the )?(?:GNU )?'
            r'(?:General Public License|GPL)', re.IGNORECASE),
        'GPL', parse_gpl),
    ReLicense(re.compile(r'is distributed (?:[^ ]+ ){,16}terms (?:[^ ]+ ){,64}GPL'),
              'GPL', parse_gpl),
    ReLicense(re.compile(r'licensed under the (?:[^ ]+ ){,64}GPL '), 'GPL', parse_gpl),
    ReLicense(
        re.compile(r'may be distributed and/or modified under the '
                   r'terms of the GNU General Public License'),
        'GPL', parse_gpl),
    ReLicense(
        re.compile(r'You can Freely distribute this program under the GNU '
                   r'General Public License', re.IGNORECASE),
        'GPL', parse_gpl),
    ReLicense(
        re.compile(
            r'GNU GENERAL PUBLIC LICENSE '
            r'Version (\d+), (?:\d+ )?\w+ \d+ '
            r'Copyright \(C\) (?:[\d, ]+) Free Software Foundation, Inc. '
            r'(?:<?http[: ]?/?/?fsf.org/>? |'
            r'51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA )?'
            r'Everyone is permitted to copy and distribute verbatim copies '
            r'of this license document, but changing it is not allowed.',
            re.IGNORECASE),
        'GPL',
        lambda t, m, l:
            'GPL-{}'.format(m.group(1).rstrip('0.'))),
    ReLicense(
        re.compile(
            r'may be used under the terms of the GNU General Public License '
            r'version (\d)\.0'),
        'GPL',
        lambda t, m, l:
            'GPL-{}'.format(m.group(1))
    ),
    ReLicense(
        re.compile(
            r'license:? (?:[^ ]+ ){,2}[("]?'
            r'http[: ]/?/?creativecommons\.org/licenses/GPL/2\.0/?[")]?',
            re.IGNORECASE),
        'GPL', lambda *a: 'GPL-2+'),
    ReLicense(
        re.compile(r'License:? GPLWithACException', re.IGNORECASE),
        'GPL', lambda *a: 'GPL with AutoConf exception'),
    ReLicense(
        re.compile(r'License:? GPL[- v]*([\d.]*)(\+?)', re.IGNORECASE),
        'GPL',
        lambda t, m, l:
            'GPL{}'.format('-{}{}'.format(m.group(1).rstrip('0.'), m.group(2))
                           if m.groups()[0] else '')
    ),
    ReLicense(
        re.compile(r'This [^ ]+ is (?:freely distributable|licensed) under '
                   r'the (?:GNU (?:General )?Public License|GPL)',
                   re.IGNORECASE),
        'GPL', None),
    ReLicense(re.compile(r' GPL2 or later '), 'GPL', lambda *a: 'GPL-2+'),
    ReLicense(re.compile(r' is Free Software, released under the GNU Public '
                         r'License\. '), 'GPL', lambda *a: 'GPL-2+'),
    # LGPL
    ReLicense(
        re.compile(
            r'GNU (?:Lesser|Library) General Public License Usage '
            r'(?:[^ ]+ ){,2}This [^ ]+ may be used under the terms of '
            r'the GNU (?:Lesser|Library) General '
            r'Public License version ([\d.]+) (or later)?'
            r'.{,512} (Qt LGPL Exception version ([\d.]+))?', re.IGNORECASE),
        'LGPL',
        lambda t, m, l:
            'LGPL-{}{}{}'.format(
                m.group(1).rstrip('0.'),
                '+' if m.groups()[1] else '',
                ' with QT-{} exception'.format(m.group(4)) if m.groups()[2] else '')
    ),
    ReLicense(
        re.compile(
            r'GNU Lesser General Public License Usage '
            r'(?:[^ ]+ ){,2}This [^ ]+ may be used under the terms of '
            r'the GNU Lesser General '
            r'Public License version 2\.1 '
            r'.{,512} Qt LGPL Exception version 1\.1', re.IGNORECASE),
        'LGPL', lambda *a: 'LGPL-2.1 with QT-1.1 exception'),
    ReLicense(
        re.compile(
            r'(?:(?:is free software.? )?you (?:can|may) (?:re)?distribute '
            r'(?:it|them) and(?:/or)? modify (?:it|them)|is (?:distributed|'
            r'licensed)) under the terms (?:and conditions )?of (?:the )?'
            r'(?:version [^ ]+ '
            r'(?:\(?only\)? )?of )?the (?:GNU )?'
            r'(?:(?:Library(?:/Lesser)? |Lesser(?:/Library)? )'
            r'(?:GNU )?General Public License|LGPL)', re.IGNORECASE),
        'LGPL', parse_gpl),
    ReLicense(
        re.compile(
            r'(?:(?:is free software.? )?you (?:can|may) (?:re)?distribute '
            r'(?:it|them) and(?:/or)? modify (?:it|them)|is (?:distributed|'
            r'licensed)) under the terms of (?:the )?(?:GNU )?'
            r'(?:(?:Library |Lesser(?:/Library)? )(?:GNU )?General Public '
            r'(?:version(?: [^ ]+){,5} )?License|LGPL)',
            re.IGNORECASE),
        'LGPL', parse_gpl),
    ReLicense(
        re.compile(r'licensed under (?:[^ ]+ ){,10}LGPL '),
        'LGPL', parse_gpl),
    ReLicense(
        re.compile(
            r'may be (?:distributed and/or modified|used) under the terms of '
            r'the GNU Lesser(?:/Library)? '
            r'General Public License', re.IGNORECASE),
        'LGPL', parse_gpl),
    ReLicense(
        re.compile(r'This [^ ]+ is '
                   r'(?:distributed|freely distributable|licensed) under '
                   r'the (?:(?:terms|conditions) of the |license )?(?:GNU )?'
                   r'(?:(?:Lesser|Library) (?:General )?'
                   r'Public License|LGPL)',
                   re.IGNORECASE),
        'LGPL', parse_gpl),
    ReLicense(
        re.compile(
            r'this (?:[^ ]+ ){,2}may be used under the terms of '
            r'(?:[^ ]+ ){,20}the GNU (?:Lesser|Library) General '
            r'Public License '
            r'Version ([\d.]+) ((?:or later)?)', re.IGNORECASE),
        'LGPL',
        lambda t, m, l:
            'LGPL-{}{}'.format(
                m.group(1).rstrip('0.'),
                '+' if m.groups()[1] else '')
    ),
    ReLicense(
        re.compile(r'License:? LGPL[- v]*([\d.]*)(\+?)', re.IGNORECASE),
        'LGPL',
        lambda t, m, l:
            'LGPL{}'.format('-{}{}'.format(m.group(1).rstrip('0.'), m.group(2))
                            if m.groups()[0] else '')
    ),
    ReLicense(
        re.compile(r'Distributed under the LGPL\.'),
        'LGPL', None),
    ReLicense(
        re.compile(r'This (?:[^ ]+ ){,2}is free software licensed under the '
                   r'GNU LGPL(?:\.|->) You can find a copy of this license in '
                   r'LICENSE\.txt in the top directory of the source '
                   r'code(\.|->)', re.IGNORECASE),
        'LGPL', None),  # found in marble, actually lgpl-2.1+
    ReLicense(
        re.compile(
            r'This version of the GNU Lesser General Public License '
            r'incorporates the terms and conditions of version 3 of the '
            r'GNU General Public License, supplemented by the additional '
            r'permissions listed below.',
            re.IGNORECASE),
        'LGPL', lambda t, m, l: 'LGPL-3+'),
    # GFDL, GFDL-NIV
    ReLicense(
        re.compile(r'Permission is (?:hereby )?granted to copy, distribute '
                   r'and(?:/or)? modify this [^ ]+ under the terms of the GNU '
                   r'Free Documentation License', re.IGNORECASE),
        'GFDL', parse_gfdl),
    ReLicense(
        re.compile(
            r'[< ]legalnotice[> ][& ]?FDLNotice;[< ]/legalnotice[> ]',
            re.IGNORECASE),
        'GFDL', lambda t, m, l: 'GFDL-1.2+'),
    # LPPL
    ReLicense(
        re.compile(r'This [^ ]+ (?:(?:can|may)(?: be)?|is) (?:re)?distributed '
                   r'and(?:/or)? modified under the (?:terms|conditions) of '
                   r'the LaTeX Project Public License', re.IGNORECASE),
        'LPPL', parse_lppl),
    # MPL
    ReLicense(
        re.compile(
            r'Mozilla Public License,? (?:Version|v\.?) ?(\d+(?:\.\d+)?)',
            re.IGNORECASE),
        'MPL', lambda t, m, l: 'MPL-{}'.format(m.group(1))
    ),
    # Perl
    ReLicense(
        re.compile(r'you can redistribute it and/or modify it under the '
                   r'same terms as Perl', re.IGNORECASE),
        'GPL', lambda *a: 'GPL-1+'),
    ReLicense(
        re.compile(r'you can redistribute it and/or modify it under the '
                   r'same terms as Perl', re.IGNORECASE),
        'Artistic', lambda *a: 'Artistic-1'),
    # Python
    ReLicense(
        re.compile(r'PYTHON SOFTWARE FOUNDATION LICENSE (VERSION ([^ ]+))?',
                   re.IGNORECASE),
        'Python',
        lambda t, m, l:
            'Python{}'.format('-{}'.format(m.group(2))
                              if m.groups()[1] else
                              '')
    ),
    # QPL
    ReLicense(
        re.compile(r'This file is part of the .*Qt GUI Toolkit. This file may '
                   r'be distributed under the terms of the Q Public License '
                   r'as defined'),
        'QPL', None),
    ReLicense(
        re.compile(r'may (?:be distributed|redistribute it) under the terms '
                   r'of the Q Public License'),
        'QPL', None),
    # W3C
    ReLicense(
        re.compile(r'This [^ ]+ (?:(?:may|can)(?: be)?|is) (?:re)distributed '
                   r'under the (?:W3C®|W3C) Software License', re.IGNORECASE),
        'W3C', None),
    # Zlib
    ReLicense(
        re.compile(r'The origin of this software must not be misrepresented'
                   r'.*Altered source versions must be plainly marked as such'
                   r'.*This notice may not be removed or altered from any '
                   r'source distribution', re.IGNORECASE),
        'Zlib', None),
    ReLicense(
        re.compile(r'see copyright notice in zlib\.h', re.IGNORECASE),
        'Zlib', None),
    ReLicense(
        re.compile(r'This code is released under the libpng license',
                   re.IGNORECASE),
        'Zlib', None),
    # Zope
    ReLicense(
        re.compile(
            r'This [^ ]+ (?:(?:(?:can|may)(?: be)?|is) (?:re)?distributed '
            r'and(?:/or)? modified under the (?:terms|conditions)|is '
            r'subject to the provisions) of the Zope Public License',
            re.IGNORECASE),
        'ZPL', parse_zpl),
    # Other licenses not in:
    # https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
    ReLicense(
        re.compile(
            r'Licensed under the Academic Free License version 2\.0', re.IGNORECASE),
        'Academic', lambda *a: 'Academic-2.0'),
    ReLicense(
        re.compile(
            r'(?:(?:is free software.? )?you (?:can|may) (?:re)?distribute '
            r'(?:it|them) and(?:/or)? modify (?:it|them)|is (?:distributed|'
            r'licensed)) under the terms of (?:the )?(?:version [^ ]+ '
            r'(?:\(?only\)? )?of )?the (?:GNU )?'
            r'(?:Affero (?:GNU )?General Public License|AGPL)', re.IGNORECASE),
        'AGPL', parse_gpl),
    ReLicense(
        re.compile(
            r'(?:(?:is free software.? )?you (?:can|may) (?:re)?distribute '
            r'(?:it|them) and(?:/or)? modify (?:it|them)|is (?:distributed|'
            r'licensed)) under the terms of (?:the )?(?:GNU )?'
            r'(?:Affero (?:GNU )?General Public License|AGPL)', re.IGNORECASE),
        'AGPL', parse_gpl),
    ReLicense(
        re.compile(
            r'APPLE PUBLIC SOURCE LICENSE '
            r'Version ([\d.]+) - \w+ \d+, \d+ '
            r'Please read this License carefully before downloading this '
            r'software\. By downloading or using this software, you are '
            r'agreeing to be bound by the terms of this License\. If you do '
            r'not or cannot agree to the terms of this License, please do '
            r'not download or use the software.', re.IGNORECASE),
        'APSL', lambda t, m, l: 'APSL-{}'.format(m.group(1))
    ),
    ReLicense(
        re.compile(
            r'This file contains Original Code and/or Modifications of '
            r'Original Code as defined in and that are subject to the Apple '
            r'Public Source License Version ([\d.]+) \(the \'?License[\' ]?\). '
            r'You may not use this file except in compliance with the '
            r'License\. Please obtain a copy of the License at '
            r'http[: ]/?/?www\.opensource\.apple\.com/apsl/? '
            r'and read it before using this file.', re.IGNORECASE),
        'APSL', lambda t, m, l: 'APSL-{}'.format(m.group(1))
    ),
    ReLicense(
        re.compile(
            r'License: BSD Protection license', re.IGNORECASE),
        'BSD', lambda *a: 'BSD_Protection_License(non-dfsg)'),
    ReLicense(
        re.compile(
            r'Boost Software License[ .,-]+(Version (\d+(?:\.\d+)*)[ .,-]+?)?',
            re.IGNORECASE),
        'BSL',
        lambda t, m, l:
            'BSL{}'.format('-{}'.format(m.group(2))
                           if m.groups()[1] else
                           '')
    ),
    ReLicense(
        re.compile(r'Permission is hereby granted, free of charge, to any '
                   r'person or organization obtaining a copy of the software '
                   r'and accompanying documentation covered by this license '
                   r'\(the[" ]*Software[" ]?\)', re.IGNORECASE),
        'BSL', None),
    ReLicense(
        re.compile(r'THE BEER-WARE LICENSE', re.IGNORECASE),
        'Beerware', None),
    ReLicense(
        re.compile(r'under the terms of the CeCILL-([^ ]+) ', re.IGNORECASE),
        'CeCILL', lambda t, m, l: 'CeCILL-{}'.format(m.group(1))
    ),
    ReLicense(
        re.compile(r'under the terms of the CeCILL ', re.IGNORECASE),
        'CeCILL', None),
    ReLicense(
        re.compile(
            r'The module is,?(?: [^ ]+){,5} licensed under(:? [^ ]+){,2} '
            r'CRYPTOGAMS(?: [^ ]+){,16} http[: ]/?/?www.openssl.org/[~ ]?appro/cryptogams',
            re.IGNORECASE),
        'CRYPTOGAMS', None),
    ReLicense(
        re.compile(r'Eclipse Public License - v ([\d.]+) '
                   r'THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF '
                   r'THIS ECLIPSE PUBLIC LICENSE \([" ]AGREEMENT[" ]\)\. ANY USE, '
                   r'REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES '
                   r'RECIPIENT[\' ]S ACCEPTANCE OF THIS AGREEMENT\.',
                   re.IGNORECASE),
        'EPL', lambda t, m, l: 'EPL-{}'.format(m.group(1).rstrip('0.'))
    ),
    ReLicense(
        re.compile(r'The (?:[^ ]+ ){,3}are published under the EPL ([\d.]+), '
                   r'see COPYING.TESTDATA for the full license', re.IGNORECASE),
        'EPL', lambda t, m, l: 'EPL-{}'.format(m.group(1).rstrip('0.'))
    ),
    ReLicense(
        re.compile(
            r'This file (?:is part of the FreeType project, and )?may only be used,? '
            r'modified,? and distributed under the terms of the FreeType project '
            r'license, LICENSE.TXT. By continuing to use, modify, or distribute '
            r'this file you indicate that you have read the license and '
            r'understand and accept it fully.', re.IGNORECASE),
        'FTL', None),
    ReLicense(
        re.compile(
            r'This license applies to all files distributed in the original '
            r'FreeType Project, including all source code, binaries and '
            r'documentation, unless otherwise stated in the file in its '
            r'original, unmodified form as distributed in the original archive. '
            r'If you are unsure whether or not a particular file is covered by '
            r'this license, you must contact us to verify this.', re.IGNORECASE),
        'FTL', None),
    ReLicense(
        re.compile(
            r'In lieu of a licence Fonts in this site are offered free for any '
            r'use; they may be installed, embedded, opened, edited, modified, '
            r'regenerated, posted, packaged and redistributed.', re.IGNORECASE),
        'fonts-ancient-scripts_license', None),
    ReLicense(
        re.compile(
            r'These conditions apply to any software derived from or based on the IJG '
            r'code, not just to the unmodified library. If you use our work, you ought '
            r'to acknowledge us.', re.IGNORECASE),
        'IJG', None),
    ReLicense(
        re.compile(r'Microsoft Permissive License \(Ms-PL\)', re.IGNORECASE),
        'MS-PL', None),
    ReLicense(
        re.compile(r'Permission to use, copy, modify and distribute this '
                   r'(?:[^ ]+ ){,3}(?:and its accompanying documentation )?'
                   r'for any purpose and without fee is hereby granted'
                   r'(?: in perpetuity)?, provided that the above copyright '
                   r'notice and this (?:paragraph|permission notice) appear '
                   r'in all copies\. The copyright holders make no '
                   r'representation about the suitability of (?:[^ ]+ ){,5}'
                   r'for any purpose\. It is provided "?as is"? without '
                   r'expressed or implied warranty\.',
                   re.IGNORECASE),
        'MIT/X11', lambda *a: 'MIT_OLIF'),
    ReLicense(
        re.compile(
            r'Subject to acceptance of the following conditions, permission '
            r'is hereby granted by Licensors without the need for written '
            r'agreement and without license or royalty fees, to use, copy, '
            r'modify and distribute this software for any purpose\. '
            r'The above copyright notice and the following four paragraphs '
            r'must be reproduced in all copies of this software and any '
            r'software including this software\.', re.IGNORECASE),
        'MIT/X11', lambda *a: 'MIT_versit'),
    ReLicense(
        re.compile(r'Permission is hereby granted, free of charge, to any '
                   r'person obtaining a copy of this software and(?:\/or)? '
                   r'associated documentation files \(the[" ]*'
                   r'(?:Software|Materials)[" ]?\), to deal in the '
                   r'(?:Software|Materials)',
                   re.IGNORECASE),
        'MIT/X11', parse_mit),
    ReLicense(
        re.compile(r'Permission to use, copy, modify, and distribute '
                   r'(?:[^ ]+ ){,3}(?:and its (:?accompanying )?documentation )?'
                   r'for any purpose and without fee is hereby granted',
                   re.IGNORECASE),
        'MIT/X11', parse_mit),
    ReLicense(
        re.compile(r'Permission is hereby granted, without written agreement '
                   r'and without license or royalty fees, to use, copy, '
                   r'modify, and distribute this software and its '
                   r'documentation for any purpose', re.IGNORECASE),
        'MIT/X11', parse_mit),
    ReLicense(
        re.compile(
            r'Permission is hereby granted, free of charge, to any member of '
            r'the KDE project \(the "?K Desktop Environment"? '
            r'http[ :]/?/?www.kde.org\) obtaining a copy of this "?Konqi '
            r'SDK"? package and associated documentation files \(the '
            r'"?Package[ "]?\), to deal in the Package without restriction, '
            r'including without limitation the rights to use, copy, modify, '
            r'merge, publish, distribute, sublicense, and/or sell copies of '
            r'the Package, and to permit persons to whom the Software is '
            r'furnished to do so, subject to the following conditions:? '
            r'The above copyright notice and this permission notice shall be '
            r'included in all copies or substantial portions of the Package.',
            re.IGNORECASE),
        'MIT/X11', lambda *a: 'Expat-like-Carddecks'),
    # as used in kf5, mmh
    ReLicense(
        re.compile(r'Distributed under terms of the MIT license\.',
                   re.IGNORECASE),
        'MIT/X11', lambda *a: 'Expat'),
    ReLicense(
        re.compile(
            r'Permission to use, copy, modify, distribute, and sell '
            r'this software and its documentation for any purpose is '
            r'hereby granted without fee, provided that the above '
            r'copyright notice appear in all copies and that both that '
            r'copyright notice and this permission notice appear in '
            r'supporting documentation', re.IGNORECASE),
        'MIT/X11', None),
    # as used by jquery
    ReLicense(
        re.compile(r'Released under the MIT license '
                   r'http[ :]/?/?jquery.org[ /]license', re.IGNORECASE),
        'MIT/X11', lambda *a: 'Expat'),
    ReLicense(
        re.compile(
            r'Permission to use, copy, modify, and distribute this software '
            r'for any purpose without fee is hereby granted, provided that '
            r'this entire notice is included in all copies of any software '
            r'which is or includes a copy or modification of this software '
            r'and in all copies of the supporting documentation for such '
            r'software.',
            re.IGNORECASE),
        'MIT/X11', lambda *a: 'ISC-like-dmgfp'),
    ReLicense(
        re.compile(
            r'it may be copied and furnished to others, and derivative works '
            r'that comment on or otherwise explain it or assist in its '
            r'implementation may be prepared, copied, published, and '
            r'distributed, in whole or in part, without restriction of any '
            r'kind, provided that the above copyright notice and this section '
            r'are included on all such copies and derivative works. '
            r'However, this document itself may not be modified in any way, '
            r'including by removing the copyright notice or references to '
            r'OASIS, except as needed for the purpose of developing any '
            r'document or deliverable produced by an OASIS Technical '
            r'Committee \(in which case the rules applicable to copyrights, as '
            r'set forth in the OASIS IPR Policy, must be followed\) or as '
            r'required to translate it into languages other than English. '
            r'The limited permissions granted above are perpetual and will '
            r'not be revoked by OASIS or its successors or assigns.',
            re.IGNORECASE),
        'OASIS', None),
    ReLicense(
        re.compile(
            r'Redistribution and use in source and binary forms, with or without '
            r'modification, are permitted provided that the following conditions '
            r'are met:? '
            r'1. Redistributions of source code must retain the above copyright '
            r'notice, this list of conditions and the following disclaimer. '
            r'2. Redistributions in binary form must reproduce the above copyright '
            r'notice, this list of conditions and the following disclaimer in '
            r'the documentation and/or other materials provided with the '
            r'distribution. '
            r'3. All advertising materials mentioning features or use of this '
            r'software must display the following acknowledgment:? '
            r'"?This product includes software developed by the OpenSSL Project '
            r'for use in the OpenSSL Toolkit. \(http[: ]/?/?www.openssl.org/\)"? '
            r'4. The names "?OpenSSL Toolkit"? and "?OpenSSL Project"? must not be used to '
            r'endorse or promote products derived from this software without '
            r'prior written permission. For written permission, please contact '
            r'(?:openssl-core|licensing)@openssl.org. '
            r'5. Products derived from this software may not be called "?OpenSSL"? '
            r'nor may "?OpenSSL"? appear in their names without prior written '
            r'permission of the OpenSSL Project. '
            r'6. Redistributions of any form whatsoever must retain the following '
            r'acknowledgment:? '
            r'"?This product includes software developed by the OpenSSL Project '
            r'for use in the OpenSSL Toolkit \(http[: ]/?/?www.openssl.org/\)"?',
            re.IGNORECASE),
        'OpenSSL', None),
    ReLicense(
        re.compile(
            r'The module is,?(?: [^ ]+){,5} licensed under OpenSSL',
            re.IGNORECASE),
        'OpenSSL', None),
    ReLicense(
        re.compile(
            r'This submission to OpenSSL is to be made available under the '
            r'OpenSSL license, and only to the OpenSSL project, in order to '
            r'allow integration into the publicly distributed code. '
            r'The use of this code, or portions of this code, or concepts '
            r'embedded in this code, or modification of this code and/or '
            r'algorithm\(s\) in it, or the use of this code for any other '
            r'purpose than stated above, requires special licensing.',
            re.IGNORECASE),
        'OpenSSL', None),
    ReLicense(
        re.compile(
            r'Rights for redistribution and usage in source and binary forms '
            r'are granted according to the OpenSSL license.', re.IGNORECASE),
        'OpenSSL', None),
    ReLicense(
        re.compile(
            r'This (?:[^ ]+ ){,2}is free (?:software|documentation); the (Free '
            r'Software Foundation|author[\( ]?s?\)?|copyright holders?) gives '
            r'unlimited permission to copy'
            r'(?: and(?:/or)? distribute it, with or without modifications, '
            r'as long as this notice is preserved\.'
            r'|, distribute and modify it\.)', re.IGNORECASE),
        'Permissive', lambda *a: 'FSF_unlimited'),
    ReLicense(
        re.compile(r'Copying and distribution of this [^ ]+, with or without '
                   r'modification, are permitted in any medium without royalty '
                   r'provided the copyright notice and this notice are '
                   r'preserved.', re.IGNORECASE),
        'Permissive', None),
    ReLicense(
        re.compile(r'This (?:[^ ]+ ){,2}may be copied and used freely without '
                   r'restrictions\.', re.IGNORECASE),
        'Permissive', None),
    ReLicense(
        re.compile(
            r'Copying and distribution of this file, with or without modification, '
            r'is permitted in any medium without royalty provided the copyright '
            r'notice and this notice are preserved.', re.IGNORECASE),
        'Permissive', None),
    ReLicense(
        re.compile(r'This source file is subject to version ([^ ]+) of the '
                   r'PHP license', re.IGNORECASE),
        'PHP', lambda t, m, l: 'PHP-{}'.format(m.group(1))
    ),
    ReLicense(
        re.compile(r'You may copy this prolog in any way that is directly '
                   r'related to this document. For other use of this prolog, '
                   r'see your licensing agreement for Qt.', re.IGNORECASE),
        'QT_Prolog', None),
    ReLicense(
        re.compile(r'under the SGI Free Software License B', re.IGNORECASE),
        'SGI-FSLB', None),
    # https://lists.debian.org/debian-legal/2006/01/msg00566.html
    ReLicense(
        re.compile(
            r'Unicode, Inc\. hereby grants the right to freely use the '
            r'information supplied in this file in the creation of products '
            r'supporting the Unicode Standard, and to make copies of this '
            r'file in any form for internal or external distribution as long '
            r'as this notice remains attached\.', re.IGNORECASE),
        'Unicode', None),
    ReLicense(
        re.compile(r'Do What The Fuck You Want To Public License, Version '
                   r'([^, ]+)', re.IGNORECASE),
        'WTFPL', lambda t, m, l: 'WTFPL-{}'.format(m.group(1))
    ),
    ReLicense(
        re.compile(r'Do what The Fuck You Want To Public License',
                   re.IGNORECASE),
        'WTFPL', None),
    ReLicense(
        re.compile(r'(?:License WTFPL|Under (?:the|a) WTFPL)',
                   re.IGNORECASE),
        'WTFPL', None),
)


def find_licenses(text):
    '''Scan the text for presence of any of the supported licenses.

    Returns:
        A list of the license ids found in the text.
    '''
    licenses = {}

    for license_re in LICENSES_RES:
        if license_re.license in licenses:
            continue
        match = license_re.re.search(text)
        if not match:
            continue
        license_ = license_re.license
        if license_re.get_detail:
            license_ = license_re.get_detail(text, match, license_)
        licenses[license_re.license] = license_

    return licenses.values()
