Here’s an example of how to parse an HTML table into an array using the PEAR module XML_HTMLSax3. It supports the
It’s worth noting that this code will raise a bunch of notices if you run it displaying all errors. This is pretty difficult to avoid, so if you don’t like that, disable the display of notices.
.
*
* @author Toby Inkster
* @copyright Copyright (C) 2007 Toby Inkster
* @license http://www.gnu.org/licenses/gpl-3.0.html GNU General Public Licence
*/
/**
* Parser class.
*
* You probably only need to directly access the “Go” method.
*/
class TableParser
{
private $currow = -1;
private $curcol = -1;
private $shape = array();
private $data = array();
public function openHandler ($parser, $tag, $attrs)
{
$tag = strtolower($tag);
// Move to the correct cell co-ordinates.
if ($tag==’tr’)
{
$this->currow++;
$this->curcol = -1;
}
elseif ($tag==’td’||$tag==’th’)
{
$this->curcol++;
}
// This should account for rowspan and colspan.
while ($this->shape[$this->currow][$this->curcol])
$this->curcol++;
$rowspan = 1;
$colspan = 1;
foreach ($attrs as $k=>$v)
{
$k = strtolower($k);
if ($k==’rowspan’)
$rowspan=(int)$v;
elseif ($k==’colspan’)
$colspan=(int)$v;
}
for ($i=0; $i<$rowspan; $i++)
for ($j=0; $j<$colspan; $j++)
{
$x = $this->currow + $i;
$y = $this->curcol + $j;
if ($this->shape[$x][$y])
error_log(‘Overlap!’);
$this->shape[$x][$y] = TRUE;
}
}
public function closeHandler ($parser, $tag)
{
}
public function dataHandler ($parser, $data)
{
$this->data[$this->currow][$this->curcol] .= $data;
}
public function getData ()
{
unset($this->data[-1]);
foreach ($this->data as $k=>$v)
unset($this->data[$k][-1]);
return $this->data;
}
public static function Go ($table_html)
{
require_once ‘XML/HTMLSax3.php’;
$sax = new XML_HTMLSax3;
$hdlr = new TableParser;
$sax->set_object($hdlr);
$sax->set_element_handler(‘openHandler’, ‘closeHandler’);
$sax->set_data_handler(‘dataHandler’);
$sax->parse($table_html);
return $hdlr->getData();
}
}
$table = ‘
Test table lalala | 123 | 456 |
789 | ABC | |
123 | 456 | |
789 |
‘;
print_r(TableParser::Go($table));
1?>