# -*- coding: utf-8 -*- from __future__ import absolute_import import logging import re from six.moves.urllib.parse import urlparse, urlunparse from pelican import signals, contents from linker import content_objects logger = logging.getLogger("linker") class Link(object): """Represents an HTML link including a linker command. Typically, the Link is constructed from an SRE_Match after applying the provided Link.regex pattern to the HTML content of a content object. """ # regex based on the one used in contents.py from pelican version 3.6.3 regex = re.compile( r""" # EXAMPLE: <\s*[^\>]* # markup (?:href|src|poster|data|cite|formaction|action)\s*=) (?P["\']) # " --> quote \{(?P.*?)\} # {mailto} --> cmd (?P.*?) # webmaster --> __url (see path) \2 # " <-- quote """, re.X) def __init__(self, context, content_object, match): """Construct a Link from an SRE_Match. :param context: The shared context between generators. :param content_object: The associated pelican.contents.Content. :param match: An SRE_Match obtained by applying the regex to my content. """ self.context = context self.content_object = content_object self.markup = match.group('markup') self.quote = match.group('quote') self.cmd = match.group('cmd') self.__url = urlparse(match.group('url')) self.path = self.__url.path def href(self): # rebuild matched URL using (possibly updated) self.path return urlunparse( self.__url._replace(path=self.path) ) def html_code(self): # rebuild matched pattern from (possibly updated) self return ''.join((self.markup, self.quote, self.href(), self.quote)) class LinkerBase(object): """Base class for performing the linker command magic. In order to provide the linker command 'foo' as in '