From 7506577e3734e011e3ee29e6e4fe8070cc05ef24 Mon Sep 17 00:00:00 2001 From: John Mager Date: Sat, 20 Jun 2020 20:24:45 -0400 Subject: [PATCH] Protects some sequences for being ligaturized --- Scripts/features.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Scripts/features.py b/Scripts/features.py index 38c9583..bbab79b 100644 --- a/Scripts/features.py +++ b/Scripts/features.py @@ -59,6 +59,16 @@ def rule(liga): rules.append(ignore([liga[-2]], liga[0], liga[1:])) rules.append(ignore(head=liga[0], suffix=(liga[1:] + [liga[1]]))) + # Don't cut into `prefix` to complete a ligature. + # i.e. regex `(?=`> is not `(?`=>. + rules.extend( + [ + ignore(prefix[:-n], liga[0], liga[1:]) + for prefix in ignore_prefixes + for n in range(1, len(liga)) + if prefix[-n:] == liga[:n] + ] + ) # hardcoded ignores, i.e. `<||>` rules.extend(ignores[tuple(liga)]) @@ -198,5 +208,17 @@ ignores = defaultdict( ) +ignore_prefixes = [ + ["parenleft", "question", "colon"], + # Regexp lookahead/lookbehind + ["parenleft", "question", "equal"], + ["parenleft", "question", "less", "equal"], + ["parenleft", "question", "exclam"], + ["parenleft", "question", "less", "exclam"], + # PHP