Prendere i tag da una pagina web con PHP e preg_match()

Queste funzioni sono delle vere chicche per tutti i webmaster, perchè aiutano in alcune operazione ed è possibile sviluppare alcuni tools molto semplicemente…

Da dire che () ritornerà un solo risultato, mentre _all() si fermerà quando non ci saranno più occorrenze. Con queste funzioni è possibile creare uno spider che magari indicizzi i siti su un ipotetico motore di ricerca, oppure creare un bot che faccia indagini di mercato prendendo i prezzi da e-commerce in giro e li confronti…insomma con un pò di immaginazione si fà molto.

< ?php
 
/* Functions for retrieving web document tags */
/* written by artViper designstudio ©2007 all rights reserved */
/* this function list is listed under the GPL */
/* if you use this, please honor our work and name us on your page */
/* if you have further questions, enhancements or anything else */
/* then drop a line at admin@artviper.net */
/* most functions return the content of the requested tags in array[0] */
/* and the count in array[1] except those, where a special function to */
/* retrieve the count is given */
/* example usage :
$file = file_get_contents("http://www.artviper.com");
$x = (get_link_rel($file));
print_r($x);
*/
 
// retrieve doctype of document
function get_doctype($file){
    $h1tags = preg_match('//is',$file,$patterns);
    $res = array();
    array_push($res,$patterns[0]);
    array_push($res,count($patterns[0]));
    return $res;
}
 
// retrieve page title
function get_doc_title($file){
    $h1tags = preg_match('/ ?.* < \/title>/isx',$file,$patterns);
    $res = array();
    array_push($res,$patterns[0]);
    array_push($res,count($patterns[0]));
    return $res;
}
 
// retrieve keywords
function get_keywords($file){
    $h1tags = preg_match('/()/i',$file,$patterns);
    $res = array();
    array_push($res,$patterns[2]);
    array_push($res,count($patterns[2]));
    return $res;
}
 
// get rel links in header of the site
function get_link_rel($file){
    $h1tags = preg_match_all('/(rel=)(".*") href=(".*")/im',$file,$patterns);
    $res = array();
    array_push($res,$patterns);
    array_push($res,count($patterns[2]));
    return $res;
}
 
function get_external_css($file){
    $h1tags = preg_match_all('/(href=")(\w.*\.css)"/i',$file,$patterns);
    $res = array();
    array_push($res,$patterns[2]);
    array_push($res,count($patterns[2]));
    return $res;
}
 
// retrieve all h1 tags
function get_h1($file){
    $h1tags = preg_match_all("/(

)(\w.*)(< \/h1>)/isxmU”,$file,$patterns);     $res = array();     array_push($res,$patterns[2]);     array_push($res,count($patterns[2]));     return $res; }   // retrieve all h2 tags     function get_h2($file){     $h1tags = (“/(

)(\w.*)(< \/h2>)/isxmU”,$file,$patterns);     $res = array();     array_push($res,$patterns[2]);     array_push($res,count($patterns[2]));     return $res; }   // retrieve all h3 tags function get_h3($file){     $h1tags = preg_match_all(“/(

)(\w.*)(< \/h3>)/ismU”,$file,$patterns);     $res = array();     array_push($res,$patterns[2]);     array_push($res,count($patterns[2]));     return $res; }   // retrieve all h4 tags function get_h4($file){     $h1tags = preg_match_all(“/(

)(\w.*)(< \/h4>)/ismU”,$file,$patterns);     $res = array();     array_push($res,$patterns[2]);     array_push($res,count($patterns[2]));     return $res; }   // retrieve all h5 tags function get_h5($file){     $h1tags = preg_match_all(“/(

)(\w.*)(< \/h5>)/ismU”,$file,$patterns);     $res = array();     array_push($res,$patterns[2]);     array_push($res,count($patterns[2]));     return $res; }   // retrieve all h5 tags function get_h6($file){     $h1tags = preg_match_all(“/(
)(\w.*)(< \/h6>)/ismU”,$file,$patterns);     $res = array();     array_push($res,$patterns[2]);     array_push($res,count($patterns[2]));     return $res; }   // retrieve p tag contents function get_p($file){     $h1tags = preg_match_all(“/(

)(\w.*)(< \/p>)/ismU”,$file,$patterns);     $res = array();     array_push($res,$patterns[2]);     array_push($res,count($patterns[2]));     return $res; }   // retrieve names of links function get_a_content($file){     $h1count = preg_match_all(“/()(\w.*)(< .*>)/ismU”,$file,$patterns);     return $patterns[2]; }   // retrieve link destinations function get_a_href($file){     $h1count = preg_match_all(‘/(href=”)(.*?)(“)/i’,$file,$patterns);     return $patterns[2]; }   // get count of href’s function get_a_href_count($file){     $h1count = preg_match_all(‘/< (a.*) href=\”(.*?)\”(.*)/’,$file,$patterns);     return count($patterns[0]); }   //get all additional tags inside a link tag function get_a_additionaltags($file){     $h1count = preg_match_all(‘/< (a.*) href=”(.*?)”(.*)>(.*)(< \/a>)/’,$file,$patterns);     return $patterns[3]; }   // retrieve span’s function get_span($file){     $h1count = preg_match_all(‘/()(.*)(< \/span>)/’,$file,$patterns);     $res = array();     array_push($res,$patterns[2]);     array_push($res,count($patterns[2]));     return $res; }   // retrieve spans on the site function get_script($file){     $h1count = preg_match_all(‘/(

GD Star Rating
loading...
GD Star Rating
loading...