QuantCast Screen Scraper

This is a quick and dirty screen scraper for quantcast.com. It currently only grabs the site rank and description.

Updated: 8/10/2010
Rank, Description and US Stats are now working again. Global Stats are no longer available.

Usage:

$q = new QuantCast('cnet.com');
		echo '<strong>Rank:</strong> ' . $q->getRank() . '<br />';
		echo '<strong>Desc:</strong> ' . $q->getDescription() . '<br />';


Class Source Code:

class QuantCast {

 public $siteName;
 public $rank = 0;
 public $description = '';
 public $usTraffic = '';

 private $siteContents;

 public function __construct($siteName){

 $this->siteName = strtolower(str_replace('http://', '', $siteName));
 $url = "http://www.quantcast.com/{$this->siteName}";

 $ch = curl_init($url);
 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
 curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8 ( .NET CLR 3.5.30729)");
 $this->siteContents = curl_exec($ch);
 curl_close($ch);

 $newlines = array("\t","\n","\r","\x20\x20","\0","\x0B");
 $this->siteContents = str_replace($newlines, "", html_entity_decode($this->siteContents));
 }

 public function getRank(){
 $this->_getRank();
 return $this->rank;
 }

 public function getDescription(){
 $this->_getDescription();
 return $this->description;
 }

 public function getUsStats(){
 $this->_getUsStats();
 return $this->usTraffic;
 }

 private function _getRank(){
 $start = strpos($this->siteContents, '<li>');
 $end = strpos($this->siteContents, '</li>', $start);
 $data = substr($this->siteContents, $start, $end-$start);
 preg_match_all('/\<strong\>([0-9,]*)\<\/strong\>/', $data, $ar_matches);
 $this->rank = $ar_matches[1][0];
 }

 private function _getDescription(){
 $start = strpos($this->siteContents, '<div>');
 $end = strpos($this->siteContents, '</div>', $start);
 $data = substr($this->siteContents, $start, $end-$start);
 $this->description = strip_tags($data);
 }

 private function _getUsStats(){
 $site = explode('.', $this->siteName);
 $td = '<td id="reach-wd:' . $site[1] . '.' . $site[0] . '">';
 $start = strpos($this->siteContents, $td);
 $end = strpos($this->siteContents, '</td>', $start);
 $data = substr($this->siteContents, $start, $end-$start);
 $data = strip_tags($data);
 $data = str_replace('Est.MonthlyUS People', '', $data);
 $this->usTraffic = $data;
 }
 }
    • anoop
    • June 8th, 2010

    HI nice script.. Rank scrab is not working…………

    • Ashwin Surajbali
    • June 8th, 2010

    anoop :
    HI nice script.. Rank scrab is not working…………

    Thanks, I’ll double check it. They might have changed their layout slightly.

    • Skeep
    • August 25th, 2010

    It looks like Quantcast may have changed their code a little. To get rank to work I changed line 41 to:
    $start = strpos($this->siteContents,”);

    I also had to change line 49 to:
    $start = strpos($this->siteContents, ”);

    That fixed the script for me.

    • Skeep
    • August 25th, 2010

    Sorry looks like HTML is being cleaned.

    It looks like Quantcast may have changed their code a little. To get rank to work I changed line 41 to:
    $start = strpos($this->siteContents,’<li class=”rank”>’);

    I also had to change line 49 to:
    $start = strpos($this->siteContents, ‘<div class=”summary”>’);

    That fixed the script for me.

  1. No trackbacks yet.