#!/usr/bin/perl

sub shift_ADD ($$;$) { # The shift-add a.k.a.
                       # the Baeza-Yates k-mismatches.

    my $maxbits =  32; # Maximum pattern length.
    my $Sigma   = 256; # Assume 8-bit text.

    my ( $T, $P, $k ) = @_; # The text, the pattern,
                            # and the maximum mismatches.

    # Sanity checks.

    my $n = length( $T );

    $k = int( log( $n ) + 1 ) unless defined $k; # O(n lg n)
    return index( $T, $P ) if $k == 0; # The fast lane.

    my $m = length( $P );

    return index( $T, $P ) if $m == 1; # Another fast lane.

    die "pattern '$P' longer than $maxbits\n" if $m > $maxbits;
    return -1 if $m > $n;
    return  0 if $m == $n and $P eq $T;

    # Preprocess.

    # We need ceil( log ( k+1 ) ) + 1 bits wide counters.
    #                  2
    # The 1.4427 approximately equals 1 / log(2).
    my $bits = int ( 1.4427 * log( $k + 1 ) + 0.5) + 1;
    if ( $m * $bits > $maxbits ) {
        warn "mismatches $k too much for the pattern '$P'\n";
        die "maximum ", $maxbits / $m / $bits, "\n";
    }

    use integer;

    my ( $mask, $ovmask ) = ( 1 << ( $bits - 1 ), 0 );
    my ( $i, @table );

    # Initialize the $ovmask for masking out the counter overflows.
    # Also the $mask gets shifted to its rightful place.
    for ( $i = 0; $i < $m; $i++ ) {
        $ovmask |= $mask;
        $mask <<= $bits; # The $m * $bits lowest bits will end up 0.
    }
    # Now every ${bits}th bit of $ovmask is 1.
    # For example if $bits == 3, $ovmask is ...100100100.

    $table[ 0 ] = $ovmask >> ( $bits - 1 ); # Initialize table[0].
    # Copy initial bits to table[1..].
    for ( $i = 1; $i < $Sigma; $i++ ) {
        $table[ $i ] = $table[ 0 ];
    }
    # Now all counters at all @table entries are initialized to 1.
    # For example if $bits == 3, @table entries are ..001001001.

    # The counters corresponding to the characters of $P are zeroed.
    # (Note that $mask now begins a new life.)
    for ( $i = 0, $mask = 1 ; $i < $m; $i++, $mask <<= $bits ) {
        $table[ ord( substr( $P, $i, 1 ) ) ] &= ~$mask;
    }

    # Search.

    $mask     = ( 1 << ( $m * $bits) ) - 1;
    my $state = $mask & ~$ovmask;
    my $ov    = $ovmask; # The $ov will record the counter overflows.
    # Match is possible only if $state doesn't contain these bits.
    my $watch = ( $k + 1 ) << ( $bits * ( $m - 1 ) );

    for ( $i = 0; $i < $n; $i++ ) {
        $state =                           # Advance the state.
            ( ( $state << $bits ) +        # The 'Shift' and the 'ADD'.
            $table[ ord( substr( $T, $i, 1 ) ) ] ) & $mask;
        $ov =                              # Record the overflows.
            ( ( $ov << $bits ) |
              ( $state & $ovmask) )                & $mask;
        $state &= ~$ovmask;                # Clear the overflows.
        if ( ( $state | $ov ) < $watch ) { # Check for match.
            # We have a match with
            # $state >> ( $bits * ( $m - 1 ) ) ) mismatches.
            return $i - $m + 1; # Match.
        }
    }

    return -1; # Mismatch.
}
