PHP Linkifier

Here’s a nifty bit of code I wrote a while back (utilising a function from PHP SmartyPants) that takes a bunch of HTML, searches it for some word or phrase that you specify, and turns all occurances of it into a link, unless it’s already a link!

“>, or a
# run of text between tags. Each element of the array is a
# two-element array; the first is either ‘tag’ or ‘text’;
# the second is the actual value.
#
# Based on the _tokenize() subroutine from Brad Choate’s MTRegex plugin.
#

$len = strlen($str);

$depth = 6;
$nested_tags = str_repeat(‘(?:<(?:[^<>]|’, $depth);
$nested_tags = substr($nested_tags, 0, -1);
$nested_tags .= str_repeat(‘)*>)’, $depth);

$match = “/(?s: ) |
(?s: <\? .*? \?> ) |
$nested_tags/x”;

$last_tag_end = -1;
$loops = $offset = 0;

while (preg_match($match, substr($str, $offset), $hits, PREG_OFFSET_CAPTURE)) {

$extracted_tag = $hits[0][0]; // contains the full HTML tag
$tag_start = $offset + (int)$hits[0][1]; // position of captured in string
$offset = $tag_start + 1; // tells preg_match where to start on next iteration

// if this tag isn’t next to the previous one, store the interstitial text
if ($tag_start > $last_tag_end) {
$tokens[] = array(‘type’ => TOKENS_TYPE_TEXT,
‘body’ => substr($str, $last_tag_end+1, $tag_start-$last_tag_end-1));
}

$tokens[] = array(‘type’ => TOKENS_TYPE_TAG,
‘body’ => $extracted_tag);

$last_tag_end = $tag_start + strlen($extracted_tag) – 1;

if ($loops++ > MAX_TOKENIZER_LOOPS) {

if (ADVISE_TOKENIZER_FAILURE) {
print “SmartyPants _tokenize failure.”;
}
return;
}
}

// if text remains after the close of the last tag, grab it
if ($offset < $len) { $tokens[] = array('type' => TOKENS_TYPE_TEXT,
‘body’ => substr($str, $last_tag_end + 1));
}

return;

}

function linkity ($string, $word, $link=”, $casesensitive=FALSE)
{
if ($link==”)
$link = ‘http://en.wikipedia.org/wiki/’.ucfirst($word);

$regexp = ‘#\b(‘.$word.’)\b#’.($casesenitive?”:’i’);
$inlink = FALSE;
$out = ”;

$tokens = array();
_tokenize($string, $tokens);

foreach ($tokens as $t)
{
if ($t[‘type’]==TOKENS_TYPE_TAG)
{
if (preg_match(‘#$1“,
$t[‘body’]);
}
}
return $out;
}

$str = ‘Let\’s go to my car. Tomorrow I\’ll have to buy
a new car because there is a problem with the carburettor.’;

print “$str\n”;
print linkity($str, ‘car’).”\n”;

1?>