summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorNami-Doc <vendethiel@hotmail.fr>2014-09-13 21:37:35 +0200
committerNami-Doc <vendethiel@hotmail.fr>2014-09-13 21:37:35 +0200
commit931db8c6857ddbf47f4d2a6cb7d905f61cb61e84 (patch)
treeb5f2c3a0e486db830bb5ac5e4bad0f3780dd2dc9
parent032557dc46b4776e68d689b6a7344bf8bcfec65c (diff)
Regexes thingies.
-rw-r--r--perl6.html.markdown52
1 files changed, 49 insertions, 3 deletions
diff --git a/perl6.html.markdown b/perl6.html.markdown
index fe5b197c..4e7d8c6e 100644
--- a/perl6.html.markdown
+++ b/perl6.html.markdown
@@ -1246,7 +1246,7 @@ so 'abc' ~~ / a b* c /; # `True`
so 'abbbbc' ~~ / a b* c /; # `True`
so 'aec' ~~ / a b* c /; # `False`. "b"(s) are optional, not replaceable.
-# - `**` - "Quantify It Yourself".
+# - `**` - (Unbound) Quantifier
# If you squint hard enough, you might understand
# why exponentation is used for quantity.
so 'abc' ~~ / a b ** 1 c /; # `True` (exactly one time)
@@ -1255,6 +1255,27 @@ so 'abbbc' ~~ / a b ** 1..3 c /; # `True`
so 'abbbbbbc' ~~ / a b ** 1..3 c /; # `False` (too much)
so 'abbbbbbc' ~~ / a b ** 3..* c /; # `True` (infinite ranges are okay)
+# - `<[]>` - Character classes
+# Character classes are the equivalent of PCRE's `[]` classes, but
+# they use a more perl6-ish syntax:
+say 'fooa' ~~ / f <[ o a ]>+ /; #=> 'fooa'
+# You can use ranges:
+say 'aeiou' ~~ / a <[ e..w ]> /; #=> 'aeiou'
+# Just like in normal regexes, if you want to use a special character, escape it
+# (the last one is escaping a space)
+say 'he-he !' ~~ / 'he-' <[ a..z \! \ ]> + /; #=> 'he-he !'
+# You'll get a warning if you put duplicate names
+# (which has the nice effect of catching the wrote quoting:)
+'he he' ~~ / <[ h e ' ' ]> /; # Warns "Repeated characters found in characters class"
+
+# You can also negate them ... (equivalent to `[^]` in PCRE)
+so 'foo' ~~ / <-[ f o ]> + /; # False
+
+# ... and compose them: :
+so 'foo' ~~ / <[ a..z ] - [ f o ]> + /; # False (any letter except f and o)
+so 'foo' ~~ / <-[ a..z ] + [ f o ]> + /; # True (no letter except f and o)
+so 'foo!' ~~ / <-[ a..z ] + [ f o ]> + /; # True (the + doesn't replace the left part)
+
## Grouping and capturing
# Group: you can group parts of your regexp with `[]`.
# These groups are *not* captured (like PCRE's `(?:)`).
@@ -1297,7 +1318,7 @@ say $0.WHAT; #=> (Array)
# may it be a range or a specific value (even 1).
# If you're wondering how the captures are numbered, here's an explanation:
-TODO use graphs from s05
+# (TODO use graphs from s05)
## Alternatives - the `or` of regexps
@@ -1305,6 +1326,31 @@ TODO use graphs from s05
so 'abc' ~~ / a [ b | y ] c /; # `True`. Either "b" or "y".
so 'ayc' ~~ / a [ b | y ] c /; # `True`. Obviously enough ...
+# The difference between this `|` and the one you're probably used to is LTM.
+# LTM means "Longest Token Matching". This means that the engine will always
+# try to match as much as possible in the strng
+'foo' ~~ / fo | foo /; # `foo`, because it's longer.
+# To decide which part is the "longest", it first splits the regex in two parts:
+# The "declarative prefix" (the part that can be statically analyzed)
+# and the procedural parts.
+# Declarative prefixes include alternations (`|`), conjuctions (`&`),
+# sub-rule calls (not yet introduced), literals, characters classes and quantifiers.
+# The latter include everything else: back-references, code assertions,
+# and other things that can't traditionnaly be represented by normal regexps.
+#
+# Then, all the alternatives are tried at once, and the longest wins.
+# Exemples:
+# DECLARATIVE | PROCEDURAL
+/ 'foo' \d+ [ <subrule1> || <subrule2> ] /;
+# DECLARATIVE (nested groups are not a problem)
+/ \s* [ \w & b ] [ c | d ] /;
+# However, closures and recursion (of named regexps) are procedural.
+# ... There are also more complicated rules, like specificity
+# (literals win over character classes)
+
+# Note: the first-matching `or` still exists, but is now spelled `||`
+'foo' ~~ / fo || foo /; # `fo` now.
+
### Extra: the MAIN subroutime
# The `MAIN` subroutine is called when you run a Perl 6 file directly.
# It's very powerful, because Perl 6 actually parses the argument
@@ -1317,7 +1363,7 @@ sub MAIN($name) { say "Hello, you !" }
# t.pl <name>
# And since it's a regular Perl 6 sub, you can haz multi-dispatch:
-# (using a "Bool" for the named argument so that we get `--replace`
+# (using a "Bool" for the named argument so that we can do `--replace`
# instead of `--replace=1`)
subset File of Str where *.IO.d; # convert to IO object to check the file exists