2018-03-03 09:35:28 +00:00
|
|
|
|
---
|
|
|
|
|
created_at: '2015-06-14T07:21:10.000Z'
|
|
|
|
|
title: 'Perl Cannot Be Parsed: A Formal Proof (2008)'
|
|
|
|
|
url: http://www.perlmonks.org/?node_id=663393
|
|
|
|
|
author: __Joker
|
|
|
|
|
points: 57
|
|
|
|
|
story_text: ''
|
|
|
|
|
comment_text:
|
|
|
|
|
num_comments: 12
|
|
|
|
|
story_id:
|
|
|
|
|
story_title:
|
|
|
|
|
story_url:
|
|
|
|
|
parent_id:
|
|
|
|
|
created_at_i: 1434266470
|
|
|
|
|
_tags:
|
|
|
|
|
- story
|
|
|
|
|
- author___Joker
|
|
|
|
|
- story_9714046
|
|
|
|
|
objectID: '9714046'
|
2018-06-08 12:05:27 +00:00
|
|
|
|
year: 2008
|
2018-03-03 09:35:28 +00:00
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
sub halts {
|
|
|
|
|
my $machine = shift;
|
|
|
|
|
my $input = shift;
|
|
|
|
|
is_whatever_nullary(
|
|
|
|
|
qq{
|
|
|
|
|
run_turing_equivalent("\Q$machine\E", "\Q$input\E");
|
|
|
|
|
sub whatever() {};
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
is\_whatever\_nullary() can certainly easily return a true value without
|
|
|
|
|
having to execute run\_turing\_equivalent(). The whatever prototype is a
|
|
|
|
|
compile-time property and the run-time impact of whatever
|
|
|
|
|
run\_turing\_equivalent() does will not change the fact that whatever()
|
|
|
|
|
is "nullary" at compile time.
|
|
|
|
|
|
|
|
|
|
But it is quite obvious that Perl code can't be reliably parsed without
|
|
|
|
|
running Perl code by the simple example of:
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
BEGIN {
|
|
|
|
|
if( 0.5 < rand() ) {
|
|
|
|
|
eval "sub whatever() { }; 1" or die $@;
|
|
|
|
|
} else {
|
|
|
|
|
eval "sub whatever { }; 1" or die $@;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
whatever / 25 ; # / ; die "this dies!";
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
You can, of course, replace the above conditional with some
|
|
|
|
|
high-falutin' comp sci construct of your choosing. But I find that such
|
|
|
|
|
just detracts from the obviousness. You have to run "`0.5 < rand()`" in
|
|
|
|
|
order to parse the last line of the above Perl code (which perl can
|
|
|
|
|
parse differently each time that it is run, as shown below).
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
> perl -MO=Deparse above.pl
|
|
|
|
|
# ...
|
|
|
|
|
whatever / 25;
|
|
|
|
|
- syntax OK
|
|
|
|
|
|
|
|
|
|
> perl -MO=Deparse above.pl
|
|
|
|
|
# ...
|
|
|
|
|
whatever(/ 25 ; # /);
|
|
|
|
|
die 'this dies!';
|
|
|
|
|
- syntax OK
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
\- [tye](?node=tye)` `
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
sub halts {
|
|
|
|
|
my $machine = shift;
|
|
|
|
|
my $input = shift;
|
|
|
|
|
is_whatever_nullary(
|
|
|
|
|
qq{
|
|
|
|
|
BEGIN {
|
|
|
|
|
run_turing_equivalent("\Q$machine\E", "\Q$input\E");
|
|
|
|
|
sub whatever() {};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
Using *rand()* is indeed more obvious and it was how I tested my code
|
|
|
|
|
snippets (I lent my Halting Oracle to a friend and she never returned it
|
|
|
|
|
... but don't get me started.) I did not use it in the presentation
|
|
|
|
|
because while it makes the proof more obvious, it's a proof of a weaker
|
|
|
|
|
theorem.
|
|
|
|
|
|
|
|
|
|
If you interpret *rand()* as truly random (and not as pseudo-random),
|
|
|
|
|
then we're dealing with non-deterministic programs. Someone might then
|
|
|
|
|
say, "as long as there is no non-determinism while compiling, Perl 5 is
|
|
|
|
|
statically parseable." But it's not and a proof using the Turing machine
|
|
|
|
|
simulator shows it is not. That Perl 5 is unparseable even when the code
|
|
|
|
|
is completely deterministic is a much stronger result, and the
|
|
|
|
|
distinction makes a difference in practice.
|
|
|
|
|
|
|
|
|
|
The Turing simulation in this case is brought in for very practical
|
|
|
|
|
reasons. I can't claim the credit for that. Adam Kennedy's hint took me
|
|
|
|
|
in this direction. I suspect he also knew the business about *rand()*.
|
|
|
|
|
But with his many hours of real life experience trying to statically
|
|
|
|
|
parse Perl, he focused on the stronger proof -- the one that would give
|
|
|
|
|
him the most information about what he was up against in creating
|
|
|
|
|
[PPI](http://search.cpan.org/perldoc?PPI).
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
BEGIN {
|
|
|
|
|
my $proto= "";
|
|
|
|
|
$proto= "()"
|
|
|
|
|
if run_some_perl_code();
|
|
|
|
|
eval "sub whatever$proto { }";
|
|
|
|
|
}
|
|
|
|
|
whatever / 6; # /;
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
The fact that run\_some\_perl\_code() can be any arbitrary Perl code is
|
|
|
|
|
quite clear. I guess this then boils down to the argument that somehow
|
|
|
|
|
static analysis could be used to predict the results of
|
|
|
|
|
run\_some\_perl\_code() without actually running it. But the
|
|
|
|
|
[rand](?node=rand) example clearly shows that not to be the case. As
|
|
|
|
|
does using `if @ARGV` or `if <STDIN> =~ /^y/i` or `if
|
|
|
|
|
LWP::Simple::get("http://perlmonks.org/secretAlgorithm.pl") =~ /true/`
|
|
|
|
|
or `if unlink "/etc/passwd"` or `if find_prime_of_length( ~0 ) % 3
|
|
|
|
|
== 1`.
|
|
|
|
|
|
|
|
|
|
Sure, the fact that run\_some\_perl\_code() could be trying to solve the
|
|
|
|
|
halting problem also demonstrates something. Of course, if I'm writing
|
|
|
|
|
Perl support for an IDE, I'm not seriously concerned that the
|
|
|
|
|
declaration of function prototypes is being based on answers to
|
|
|
|
|
instances of the halting problem, so I'm likely to just dismiss your
|
|
|
|
|
proof (based on your own reaction to the [rand](?node=rand) case). I
|
|
|
|
|
find "You might have to run arbitrary Perl code" to be both a stronger
|
|
|
|
|
statement of the result and a more interesting and useful one.
|
|
|
|
|
|
|
|
|
|
I also think there is a flaw in your proof in that it confuses solving
|
|
|
|
|
the halting problem with running code that may or may not halt. So your
|
|
|
|
|
conclusion would need to be more like "Here is some code that will
|
|
|
|
|
either determine that whatever() is nullary or that will never
|
|
|
|
|
terminate." Since just because the code has been taking 400 years
|
|
|
|
|
doesn't mean that you can determine that it will never halt, so your
|
|
|
|
|
code doesn't even have the potential of solving the halting problem.
|
|
|
|
|
|
|
|
|
|
So the lemma that is needed here is that "Static analysis of a program
|
|
|
|
|
in a Turing-complete language (along with its 'input') can be
|
|
|
|
|
insufficient to determine its output". Frankly, this is where it becomes
|
|
|
|
|
uninteresting to me.
|
|
|
|
|
|
|
|
|
|
(Minor updates applied during the 15 minutes after first posting.)
|
|
|
|
|
|
|
|
|
|
\- [tye](?node=tye)` `
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
eval $ARGV[ 0 ];
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
Examine what is said, not who speaks -- Silence betokens consent -- Love
|
|
|
|
|
the truth but pardon error.
|
|
|
|
|
|
|
|
|
|
"Science is about questioning the status quo. Questioning authority".
|
|
|
|
|
|
|
|
|
|
In the absence of evidence, opinion is indistinguishable from prejudice.
|
|
|
|
|
|
|
|
|
|
["Too many \[\] have been sedated by an oppressive environment of
|
|
|
|
|
political correctness and risk
|
|
|
|
|
aversion."](http://news.bbc.co.uk/1/hi/education/6202877.stm)
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
my $x;
|
|
|
|
|
BEGIN { $x = time }
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
Unless the BEGIN block is executed both at compile time and once at
|
|
|
|
|
pre-run time.
|
|
|
|
|
|
|
|
|
|
Of course, I might be a few months out of date. I got bored following
|
|
|
|
|
all the Perl 6 stuff, so I'm working on
|
|
|
|
|
[Smalltalk](http://methodsandmessages.vox.com) for a while until Perl6
|
|
|
|
|
gets a bit closer to a release.
|
|
|
|
|
|
|
|
|
|
\-- [Randal L. Schwartz, Perl hacker](http://www.stonehenge.com/merlyn/)
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
multi sub prefix:<foo> (Str $a) {
|
|
|
|
|
return $a eq "foo" ?? "bar" !! "foo";
|
|
|
|
|
}
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
This will define an unary prefix operator that takes one string as an
|
|
|
|
|
argument. When somebody writes
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
foo "foo";
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
You can't know if that's a method call or a call to an operator. You'd
|
|
|
|
|
think it makes no difference - but wait until precedence comes into the
|
|
|
|
|
play:
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
BEGIN {
|
|
|
|
|
eval
|
|
|
|
|
"
|
|
|
|
|
multi * prefix:<foo> (Str $a) is tighter(&infix:<~>) {
|
|
|
|
|
return $a eq 'foo' ?? 'bar' !! 'foo';
|
|
|
|
|
";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# and later in the code:
|
|
|
|
|
foo "fo" ~ "o";
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
If that `foo` parsed as a sub call the precedence is lower than that of
|
|
|
|
|
the concatenation `~`, and the other way round if it's parsed as an
|
|
|
|
|
operator.
|
|
|
|
|
|
|
|
|
|
This is quite a complicated example, but there are much easier ones with
|
|
|
|
|
"real" macros. (But since no implemenation really implements macros by
|
|
|
|
|
now, I don't really know much about them).
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
foo =+= bar //|/ baz =+= qux
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
The parsing of this expression depends on the relative precedence of the
|
|
|
|
|
two operators and possibily, but not necessarily, on the associativity
|
|
|
|
|
of =+= It may also result in an error, if //|/ has higher precedence and
|
|
|
|
|
=+= is declared as non-associative.
|
|
|
|
|
|
|
|
|
|
However, the parsing of this does not depend on executing any code in
|
|
|
|
|
the sense that the OP means, I think. That is, there must be a static
|
|
|
|
|
declaration somewhere else in the code that says what the properties of
|
|
|
|
|
these two operators should be. It need not lexically precede the usage,
|
|
|
|
|
but it has to appear somewhere in the relevant scope, and it cannot be
|
|
|
|
|
dynamically generated in any way. How would that fit in?
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
if (is_nullary(whatever))
|
|
|
|
|
{
|
|
|
|
|
whatever / 25;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
whatever (/ 25 ; # /);
|
|
|
|
|
die "this dies!";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
When we think about it, the Perl interpreter already does this. So
|
|
|
|
|
technicaly, parsing Perl is not a undecidable problem. You only proved
|
|
|
|
|
that there could not be a single way to parse Perl - and yes, it is a
|
|
|
|
|
problem for text editor's syntax-highlighting.
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
import sys
|
|
|
|
|
import parser
|
|
|
|
|
|
|
|
|
|
print parser.suite(open(__file__).read()).totuple()
|
|
|
|
|
|
|
|
|
|
x = eval(sys.stdin.readline())
|
|
|
|
|
|
|
|
|
|
if x:
|
|
|
|
|
print "yes", x
|
|
|
|
|
else:
|
|
|
|
|
print "no", x
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
To bring this back to the original post, the "parser" module is the
|
|
|
|
|
Python equivalent of what Parse::Perl would be if Perl parsing were
|
|
|
|
|
possible.
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
BEGIN {
|
|
|
|
|
x();
|
|
|
|
|
sub foo { }
|
|
|
|
|
}
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
then foo() will be declared only when x() terminates.
|
|
|
|
|
|
|
|
|
|
But that's like writing this Java code
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
for(;;) { }
|
|
|
|
|
int x;
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
and asking whether "x" will ever be declared, which says absolutely
|
|
|
|
|
nothing about how this program is parsed (and it's parsed fine).
|
|
|
|
|
|
|
|
|
|
Cheers.
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|
|
|
|
|
|
|
|
|
|
``` code
|
|
|
|
|
use Modern::Perl;
|
|
|
|
|
|
|
|
|
|
sub x { say "In x!" }
|
|
|
|
|
|
|
|
|
|
BEGIN { x() }
|
|
|
|
|
|
|
|
|
|
sub foo!bar { say 'In foo!bar!' }
|
|
|
|
|
|
|
|
|
|
BEGIN { foo!bar() }
|
|
|
|
|
[download]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
For extra fun, run it through
|
|
|
|
|
[B::Deparse](http://search.cpan.org/search?mode=module&query=B%3A%3ADeparse).
|
|
|
|
|
|
|
|
|
|
<!-- end list -->
|