Here's an example of how to parse an HTML table into an array using the PEAR module XML_HTMLSax3. It supports the <tr>, <td> and <th> elements and the rowspan and colspan attributes.

It's worth noting that this code will raise a bunch of notices if you run it displaying all errors. This is pretty difficult to avoid, so if you don't like that, disable the display of notices.


<?php

/**
 * Example of how to parse an HTML table using PEAR XML_HTMLSax3.
 *
 * Copyright (C) 2007 Toby Inkster
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 3 of the 
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program.
 * If not, see <http://www.gnu.org/licenses/>.
 *
 * @author Toby Inkster
 * @copyright Copyright (C) 2007 Toby Inkster
 * @license http://www.gnu.org/licenses/gpl-3.0.html GNU General Public Licence
 */

/**
 * Parser class. 
 *
 * You probably only need to directly access the "Go" method.
 */
class TableParser
{
	private $currow = -1;
	private $curcol = -1;

	private $shape = array();
	private $data  = array();

	public function openHandler ($parser, $tag, $attrs)
	{
		$tag = strtolower($tag);

		// Move to the correct cell co-ordinates.
		if ($tag=='tr')
		{
			$this->currow++;
			$this->curcol = -1;
		}
		elseif ($tag=='td'||$tag=='th')
		{
			$this->curcol++;
		}

		// This should account for rowspan and colspan.
		while ($this->shape[$this->currow][$this->curcol])
			$this->curcol++;
		$rowspan = 1;
		$colspan = 1;
		foreach ($attrs as $k=>$v)
		{
			$k = strtolower($k);
			if ($k=='rowspan')
				$rowspan=(int)$v;
			elseif ($k=='colspan')
				$colspan=(int)$v;
		}
		for ($i=0; $i<$rowspan; $i++)
			for ($j=0; $j<$colspan; $j++)
			{
				$x = $this->currow + $i;
				$y = $this->curcol + $j;
				if ($this->shape[$x][$y])
					error_log('Overlap!');
				$this->shape[$x][$y] = TRUE;
			}
	}

	public function closeHandler ($parser, $tag)
	{
	}

	public function dataHandler ($parser, $data)
	{
		$this->data[$this->currow][$this->curcol] .= $data;
	}

	public function getData ()
	{
		unset($this->data[-1]);
		foreach ($this->data as $k=>$v)
			unset($this->data[$k][-1]);
		return $this->data;
	}
	
	public static function Go ($table_html)
	{
		require_once 'XML/HTMLSax3.php';
		$sax  = new XML_HTMLSax3;
		$hdlr = new TableParser;
		$sax->set_object($hdlr);
		$sax->set_element_handler('openHandler', 'closeHandler');
		$sax->set_data_handler('dataHandler');
		$sax->parse($table_html);
		return $hdlr->getData();
	}
	
}

$table = '
<table>
  <tr>
    <td rowspan="2">Test table lalala</td>
    <td>123</td>
    <td>456</td>
  </tr>
  <tr>
    <td>789</td>
    <td>ABC</td>
  </tr>
  <tr>
    <td colspan="2" rowspan="2">123</td>
    <td>456</td>
  </tr>
  <tr>
    <td>789</td>
  </tr>
</table>
';
print_r(TableParser::Go($table));

1?>