Code: Select all
$googlenews = file("http://news.google.com/news/en/us/world.html");
Code: Select all
$popularity = 0; // table index
for($i = 46; $i < count($googlenews); $i++){ // real news start at line 46
$all = explode("<font size=",$googlenews[$i]); // it makes it easier to retrieve headers
for($j = 0; $j < count($all); $j++){
$act = $all[$j]; // actual current chunk
// a bit of cleaning up
$act = str_replace("</tr>","",$act);
$act = str_replace("</td>","",$act);
$act = str_replace("</table>","",$act);
$act = str_replace("</b>","",$act);
$act = str_replace("</font>","",$act);
$act = str_replace("<nobr>","",$act);
$act = str_replace(" ","",$act);
$act = str_replace("
","",$act);
// enough cleaning
if(stristr($act, "-1>") && stristr($act, "<font color=#6f6f6f>")){ // checking for markers of _real_ news
$where_time = str_replace("-1><font color=#6f6f6f><b>","",$act); // getting where and time as one string
$gdzie_czas = str_replace("</nobr>","",$gdzie_czas); // another cleaning routine
$where_time_arr = explode("- ",$where_time); // dividing to time and source
$popularity++; // getting current table index
$where = $where_time_arr[0];
$time = $where_time_arr[1];
// we know now where news was found, let's get news title & link
$news = explode('<td valign=top>',$all[$j-1]); // right things be right :)
$true_news = $news[1];
$news_array[$jak_dawno] = $where.'|'.$time.'|'.$true_news; // table input
}
}
}
Code: Select all
foreach($news_array as $value){
$values_arr = explode("|",$value);
$where = $values_arr[0];
$time = $values_arr[1];
$news = $values_arr[2];
echo "$news - found in $where $time
";
}