Seeing Find Similar Users on del.icio.us this week, I noticed I never posted the Perl code for something similar I made a few years ago [explanation]. It didn't work any more anyway (the scraped HTML changed), so here's an updated version in PHP. It gives a higher score to users that bookmarked earliest the same things as you, rather than the number of bookmarks they share with you (combining the two might make sense, in the end).
<?php
$username = 'hublicious';
$url = "http://del.icio.us/$username?setcount=100";
mkdir('/tmp/delicious/');
$scores = array();
$html = loadHTMLFile($url);
$links = $html->xpath("//li[@class='post']/div[@class='meta']/a[@class='pop']");
foreach ($links as $a){
$url = (string) $a['href'];
print "$url\n";
$item_html = loadHTMLFile('http://del.icio.us/' . $url , '?all');
$users = array();
$items = $item_html->xpath("//ul[@class='historylist']/li/p/a[@class='who']");
foreach ($items as $item)
$users[] = (string) $item;
$users = array_reverse($users);
$count = $start = (count($users) + 1) / 2;
foreach ($users as $user)
$scores[$user] += $count--/$start;
}
arsort($scores); print_r($scores);
$rows = array();
foreach ($scores as $user => $score)
if ($score > 0)
$rows[] = sprintf(
'<tr><td><a href="http://del.icio.us/%s">%s</a></td><td>%1.3f</td></tr>',
urlencode($user),
htmlspecialchars($user),
$score
);
$rows = implode("\n", $rows);
$output = <<<END
<html>
<head>
<title>$username</title>
</head>
<body>
<table>
<tr><th>Username</th><th>Score</th></tr>
$rows
</table>
END;
file_put_contents('gatherers.html', $output);
function cache_get_contents($url){
$md5_url = md5($url);
$file = "/tmp/delicious/$md5_url";
$cache = time() - 3600*24*7;
if (file_exists($file) && filemtime($file) > $cache)
$data = file_get_contents($file);
elseif ($data = file_get_contents($url)){
file_put_contents($file, $data);
sleep(1);
}
return $data;
}
function loadHTMLFile($url){
$html = new DOMDocument();
@$html->loadHTML(cache_get_contents($url));
return simplexml_import_dom($html);
}