Here's a small function I wrote to get all page links using the DOMDocument which will hopefully be of use to others..
<?php
$links = get_links("http://www.facebook.com");
echo "<pre>";print_r($links);exit;
 
function get_links($url) {
        $xml = new DOMDocument();
        @$xml->loadHTMLFile($url);
        $links = array();
        foreach($xml->getElementsByTagName('a') as $link) {
            if ($link->getAttribute('href') != '#' && $link->getAttribute('href') != '') {
                $links[] = array('url' => trim($link->getAttribute('href')), 'text' => trim($link->nodeValue));
            }
        }
        return $links;
    }
    ?>
 
 
No comments:
Post a Comment