Here's a nifty bit of code I wrote a while back (utilising a function from PHP SmartyPants) that takes a bunch of HTML, searches it for some word or phrase that you specify, and turns all occurances of it into a link, unless it's already a link!


<?php

// how many times do you want to allow the tokenizer to loop?
// The higher the value, the longer your system could churn
// given an infinite-loop bug (or really really really long text string).
define('MAX_TOKENIZER_LOOPS', 2000);

// print error on tokenizer loop problem? 
define('ADVISE_TOKENIZER_FAILURE', FALSE);

// keys for $tokens hash
define('TOKENS_TYPE_TEXT', 'text');
define('TOKENS_TYPE_TAG',  'tag');

function _tokenize(&$str, &$tokens) {
#
#   Parameter:  Pointer to string containing HTML markup,
#               pointer to array to store results.
#
#               Output array contains tokens comprising the input
#               string. Each token is either a tag (possibly with nested,
#               tags contained therein, such as <a href="<MTFoo>">, or a
#               run of text between tags. Each element of the array is a
#               two-element array; the first is either 'tag' or 'text';
#               the second is the actual value.
#
#   Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
#       <http://www.bradchoate.com/past/mtregex.php>


    $len = strlen($str);

    $depth = 6;
    $nested_tags = str_repeat('(?:<(?:[^<>]|', $depth);
    $nested_tags = substr($nested_tags, 0, -1);
    $nested_tags .= str_repeat(')*>)', $depth);

    $match = "/(?s: <! ( -- .*? -- \s* )+ > ) |
               (?s: <\? .*? \?> ) |
               $nested_tags/x";

    $last_tag_end = -1;
    $loops = $offset = 0;

    while (preg_match($match, substr($str, $offset), $hits, PREG_OFFSET_CAPTURE)) {

        $extracted_tag = $hits[0][0];   // contains the full HTML tag
        $tag_start = $offset + (int)$hits[0][1];  // position of captured in string
        $offset = $tag_start + 1;       // tells preg_match where to start on next iteration

        // if this tag isn't next to the previous one, store the interstitial text
        if ($tag_start > $last_tag_end) {
            $tokens[] = array('type' => TOKENS_TYPE_TEXT,
                              'body' => substr($str, $last_tag_end+1, $tag_start-$last_tag_end-1));
        }

        $tokens[] = array('type' => TOKENS_TYPE_TAG,
                          'body' => $extracted_tag);

        $last_tag_end = $tag_start + strlen($extracted_tag) - 1;
        
        if ($loops++ > MAX_TOKENIZER_LOOPS) { 
        
            if (ADVISE_TOKENIZER_FAILURE) {
                print "SmartyPants _tokenize failure."; 
            }
            return; 
        }
    }


    // if text remains after the close of the last tag, grab it
    if ($offset < $len) {
        $tokens[] = array('type' => TOKENS_TYPE_TEXT,
                          'body' => substr($str, $last_tag_end + 1));
    }

    return;
    
}

function linkity ($string, $word, $link='', $casesensitive=FALSE)
{
	if ($link=='')
		$link = 'http://en.wikipedia.org/wiki/'.ucfirst($word);

	$regexp = '#\b('.$word.')\b#'.($casesenitive?'':'i');
	$inlink = FALSE;
	$out = '';

	$tokens = array();
	_tokenize($string, $tokens);
	
	foreach ($tokens as $t)
	{
		if ($t['type']==TOKENS_TYPE_TAG)
		{
			if (preg_match('#<a#i', $t['body']))
				$inlink = TRUE;
			elseif (preg_match('#</a#i', $t['body']))
				$inlink = FALSE;
			$out .= $t['body'];
		}
		else
		{
			if ($inlink)
				$out .= $t['body'];
			else
				$out .= preg_replace($regexp,
						"<a href=\"{$link}\">$1</a>",
						$t['body']);
		}
	}
	return $out;
}

$str = 'Let\'s go to <a href="my.htm">my car</a>. Tomorrow I\'ll have to buy
a new car because there is a problem with the carburettor.';

print "$str\n";
print linkity($str, 'car')."\n";

1?>