sedition code
<pre>
#!/usr/bin/perl -w
use strict;
#=====================================================================
# OUTLINE
#=====================================================================
# Grammar Rules for DFA definitions:
# a definition/grammar is given as a scalar and looks like this:
# STATE( TEST_VALUE => TRANS_STATE, TEST_VALUE => TRANS_STATE ),
# STATE2( TEST_VALUE => TRANS_STATE, TEST_VALUE => TRANS_STATE ),
# "State" is a letter + number(s) combination to name the given state.
# "Trans_state" is the transition state where the machine arrives when
# given the transition state's "Test_value." The unique test values
# make up the machines language. The unique states make up the
# machine's states (they can be redefined). The first state in the
# definition will be the machine's start state. Capital letter states
# are accept states. The syntax looks like perl grammar but it is not!
# So...
# this: "m1( 0 => m1, 1 => m2 ), M2( 0 => m1, 1 => m2 )"
# is a simple 2 state machine. "m1" is the start state because it is
# given first (and not rescinded). "m2" is the accept state because
# it's CAPITALIZED (M2 and m2 are equivalent, the capital is only
# syntax to show that it is an accept state). The set containing 0 and
# 1 is the machine's language because that is all given test values. A
# flow chart of our machine looks like this:
# ______ 1 ====== ___
# --->| |------->|| ||/ \
# 0/ | m1 | || m2 || / 1
# \___/| |<-------|| ||<---
# ------ 0 ======
#=====================================================================
# PROGRAM PROPER
#=====================================================================
# this is a definition for a word-ish state machine. "1"s are
# consonants and "0"s are vowels. sorry there's no diagram. basically
# it requires there is at least one vowel (it has several accept
# states, capital "M"s) and that there are never 4 vowels or 5
# consonants in a row. accept means the input looks like a word. for
# instance, 1001 looks like a word b/c it could be "seat" or "zoot."
# it could also be "muup" but hey, it's a pretty simple machine.
my $definition = q{
m1 ( 0 => m6, 1 => m2 ), # start state
m2 ( 0 => m6, 1 => m3 ),
m3 ( 0 => m6, 1 => m4 ),
m4 ( 0 => m6, 1 => m5 ),
m5 ( 0 => m6, 1 => m14 ),
M6 ( 0 => m11, 1 => m7 ),
M7 ( 0 => m6, 1 => m8 ),
M8 ( 0 => m6, 1 => m9 ),
M9 ( 0 => m6, 1 => m10 ),
m10( 0 => m6, 1 => m14 ),
M11( 0 => m12, 1 => m7 ),
m12( 0 => m13, 1 => m7 ),
m13( 0 => m13, 1 => m13 ), # dead end -- 4 vowels in a row
m14( 0 => m14, 1 => m14 ), # dead end -- 5 consonants in a row
};
my $input_tape = shift || die "Give me some input.\n";
my $original = $input_tape;
$input_tape =~ s/[b-df-hj-np-tv-z]/1/ig; # change consonants to 1s
$input_tape =~ s/[aeiou]/0/ig; # vowels to zeros
$input_tape =~ s/\D//g; # collapse the rest
my @input_tape_queue = split '', $input_tape;
my $dfa = make_dfa($definition);
$dfa->($_) for @input_tape_queue; # send input through machine
printf
qq|Final state for "$original" is %s. %s.\n|,
$dfa->('state'),
$dfa->() ? "Looks like a word" : "There's no way that's a word";
exit 0;
#=====================================================================
# SUBROUTINES
#=====================================================================
sub make_dfa {
my %DFA;
my $def = shift;
$def =~ s/#[^\n]*\n//g; # hard kill comments
$def =~ s/\s+//g; # kill spacing
# catch rules as "m1(q1 => 1, q2 => 3)" into $1 [m2] and $2 [(q1..)]
while ( $def =~ m!([a-zA-Z]+\d+)\(([^)]+)\),?!g ) {
my $state = lc $1;
my @rules = split/,/, $2;
$DFA{$state} = [ @rules ];
# check for duplicates later and report redefined
push @{$DFA{'states'}}, $state;
# capital state name means it's an accept state
$DFA{'accept_state'}{$state} = 1 if $1 =~ /[A-Z]/;
}
# dupes?
my %tmp_counter;
for ( @{$DFA{'states'}} ) {
next unless $tmp_counter{$_}++; # next unless seen before
warn qq|State "$_" redefined in DFA definition!\n|;
}
my $tests = {};
foreach my $state ( @{$DFA{'states'}} ) {
# $code is used to build-up test subs for our closure to use
my $code = q{
my $input = shift;
};
foreach my $sub_test ( @{$DFA{$state}} ) {
my ( $test_value, $return ) = split(/=>/, $sub_test);
$code .= qq{ return '$return'
if \$input == $test_value;\n };
$DFA{'alphabet'}{$test_value} ||= 1;
}
$tests->{$state} = eval "sub { $code }";
}
# start state (1st state from DFA def) which will be remembered as
# a closure as long as the DFA lives
my $state = $DFA{'states'}[0];
# we're ready to return our machine
return
sub {
my $input = shift;
if ( lc $input eq 'state' )
{
# user requested to know what named state the machine is currently in
return $state;
}
elsif ( lc $input eq 'alphabet' )
{
# user requested to know what the acceptable language/input is
return sort keys %{ $DFA{'alphabet'} };
}
elsif ( defined $input
and exists $DFA{'alphabet'}{$input} )
{
# there's acceptable input (in its alphabet) to machine
$state = $tests->{$state}->( $input );
# now we test if state arrived at is an accept state and return true
# or false
}
return exists $DFA{'accept_state'}{$state} ? 1 : undef;
};
}
# Possibilities for a fuller implementation:
# automatic grammar check
# * states should not be redefined or warn
# * there should be >= 1 accept state or warn
# and you should be able to add or subtract grammar on the fly
</pre>
<pre>
Usage
jinx[13]>automata-grammar
Give me some input.
jinx[14]>automata-grammar asdsdfaposidfuasdkn
Final state for "asdsdfaposidfuasdkn" is m14.
There's no way that's a word.
jinx[15]>automata-grammar bookkeeper
Final state for "bookkeeper" is m7.
Looks like a word.
jinx[16]>automata-grammar fiddle-dee-dee
Final state for "fiddle-dee-dee" is m11.
Looks like a word.
</pre>