Payne's PHP Notes

Back to Payne's PHP Notebook.

12. Manipulate PubMed Data in XML Format (DOM)

External Links:

php.net : Document Object Model - Link.

Syntax / Example Result
<?php

// Create a new DOMDocument object for XML source
$domSource = new DOMDocument();

// Load XML file generated by Endnote
$domSource->load("references_endnote.xml");

// Source root  node
$rootSource = $domSource->documentElement;

// Generate a node list containing all RECORD records.
$records = $rootSource->getElementsByTagName("RECORD");

// Display the number of records.
echo "Numbero of records: " . $records->length, "<hr />";

// Create a new DOMDocument object for the new XML file.
$domDocOutput = new DOMDocument();
$domDocOutput->loadXML("<References></References>");  // Root node

// Go through each record
$i = 0;
foreach($records as $record)
{
$refnums = $record->getElementsByTagName("REFNUM");
$refnum = $refnums->item(0)->nodeValue;
$pmids = $record->getElementsByTagName("ACCESSION_NUMBER");
$pmid = $pmids->item(0)->nodeValue;

if(!$pmid)  // PMID is empty.
{
echo "Empty PMID<br />";
continue;
}

$i++;

// Test the first ten records
if($i >10)
{
break;
}

// Display information
echo $i;
echo ". REFNUM :" . $refnum . "; ";
echo "PMID :" . $pmid . "
";

// URL to the article
$url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=";
$url = $url . $pmid . "&retmode=xml";

// Create a new DOM object for the article
$domArticle = new DOMDocument();

// Load information from NCBI.
$domArticle->load($url);

// PubmedArticle node
$pubmedArticle = $domArticle->getElementsByTagName("PubmedArticle")->item(0);

// Create a copy of the PubmedArticle node
$articleCopy = $domDocOutput->importNode($pubmedArticle, true);

// Create a refnumNode
$refnumNode = $domDocOutput->createElement("RefNum", $refnum);

// Append refnumNode to pubmedArticle copy
$articleCopy->appendChild($refnumNode);

// Append article copy to the rood node of output XML file.
$domDocOutput->documentElement->appendChild($articleCopy);
}

// Save XML file.
$domDocOutput->formatOutput = true;
$domDocOutput->save("references_pubmed_tmp.xml");

?>
  • Open EndNote's XML file.
  • Retrieve REFNUM and ACCESSION_NUMBER (PMID).
  • Retrieve PubMed data in XML format from NCBI based on PMID.
  • This test only retrieves the first 10 records.
  • 01.php
<?php

// Create a DOM object for the XML file.
$domSource = new DOMDocument();
$domSource->load("references_pubmed.xml");
$articles = $domSource->documentElement->getElementsByTagName("PubmedArticle");

// Display the number of articles
echo "Number of PubMed articles: " . $articles->length . "<br />";

?>
  • Display the number of PubMed articles.
  • 02.php