Minimal PHP script for downloading PubMed XML (with error checking)

·
This PHP function provides the bare essentials for downloading PubMed XML for all articles matching a given query. It requires wget, to show the progress of the download.

Use it like this:

<?php
$query = '"Nature"[TA] AND "genetics"[MeSH]'; // for example
pubmed_fetch($query);
<?php
function pubmed_fetch($query){
  print "Searching for: $query\n";
  $params = array(
    'db' => 'pubmed',
    'retmode' => 'xml',
    'retmax' => 1,
    'usehistory' => 'y',
    'term' => $query,
    );
  $url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' . http_build_query($params);
  $xml = simplexml_load_file($url);
  
  pubmed_errors($xml);
  
  if (!$count = (int) $xml->Count)
    exit();
  print "$count items found\n";
  $translated = (string) $xml->QueryTranslation;
  printf("Translated query: %s\n\n", $translated);
  $params = array(
    'db' => 'pubmed',
    'retmode' => 'xml',
    'query_key' => (string) $xml->QueryKey,
    'WebEnv' => (string) $xml->WebEnv,
    'retmax' => $count,
    );
    
  $url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' . http_build_query($params);
  $file = sprintf('%s-%s.xml', preg_replace('/\W/', '_', $translated), date('YmdHis'));
  system(sprintf("wget --output-document=%s %s", escapeshellarg($file), escapeshellarg($url)));
}
function pubmed_errors($xml){
  print "\033[31m"; // red
  
  if ($xml->ErrorList){
    if ($xml->ErrorList->PhraseNotFound)
      printf("Phrase not found: %s\n", (string) $xml->ErrorList->PhraseNotFound);
    if ($xml->ErrorList->FieldNotFound)
      printf("Field not found: %s\n", (string) $xml->ErrorList->FieldNotFound);
  }
  
  if ($xml->WarningList){
    print (string) $xml->WarningList->OutputMessage . "\n"; 
    if ($xml->WarningList->QuotedPhraseNotFound)
      printf("Quoted phrase not found: %s\n", (string) $xml->WarningList->QuotedPhraseNotFound); 
    if ($xml->WarningList->PhraseIgnored)
      printf("Phrase ignored: %s\n", (string) $xml->WarningList->PhraseIgnored);
  }
  
  print "\033[00m"; // default
}