My Speediest Gatherers updated

·
Seeing Find Similar Users on del.icio.us this week, I noticed I never posted the Perl code for something similar I made a few years ago [explanation]. It didn't work any more anyway (the scraped HTML changed), so here's an updated version in PHP. It gives a higher score to users that bookmarked earliest the same things as you, rather than the number of bookmarks they share with you (combining the two might make sense, in the end).
<?php
$username = 'hublicious';  // EDIT THIS
$url = "http://del.icio.us/$username?setcount=100";
mkdir('/tmp/delicious/');
$scores = array();
// fetch the user's HTML page
$html = loadHTMLFile($url);
// links to individual URL pages
$links = $html->xpath("//li[@class='post']/div[@class='meta']/a[@class='pop']");
foreach ($links as $a){
  $url = (string) $a['href'];
  print "$url\n";
  // fetch item HTML page
  $item_html = loadHTMLFile('http://del.icio.us/' . $url , '?all');
  $users = array();
  $items = $item_html->xpath("//ul[@class='historylist']/li/p/a[@class='who']");
  foreach ($items as $item)
    // each user that has bookmarked this item
    $users[] = (string) $item;
  // later bookmarks get lower score
  $users = array_reverse($users);
  $count = $start = (count($users) + 1) / 2;
  foreach ($users as $user)
    $scores[$user] += $count--/$start;
}
arsort($scores); print_r($scores);
$rows = array();
foreach ($scores as $user => $score)
  if ($score > 0)
    $rows[] = sprintf(
      '<tr><td><a href="http://del.icio.us/%s">%s</a></td><td>%1.3f</td></tr>',
      urlencode($user),
      htmlspecialchars($user),
      $score
      );
$rows = implode("\n", $rows);
$output = <<<END
<html>
<head>
<title>$username</title>
</head>
<body>
<table>
<tr><th>Username</th><th>Score</th></tr>
$rows
</table>
END;
file_put_contents('gatherers.html', $output);
function cache_get_contents($url){
  $md5_url = md5($url);
  $file = "/tmp/delicious/$md5_url";
  $cache = time() - 3600*24*7; // cache for 1 week
  if (file_exists($file) && filemtime($file) > $cache)
    $data = file_get_contents($file);
  elseif ($data = file_get_contents($url)){
    file_put_contents($file, $data);
    sleep(1);
  }
  return $data;
}
function loadHTMLFile($url){
  $html = new DOMDocument();
  @$html->loadHTML(cache_get_contents($url));
  return simplexml_import_dom($html);
}