c++ - Return node information during comparison -

i've had bit of make code. @ moment code print out id numbers of differences in files, i.e new compared old has been added, removed or stayed same.

however want return information in node when appears in new.xml, not id (i.e title, location, date).

my best guess can find google use (with no idea how implement): xpath->getancestor

my current code

#include <set> #include <string> #include <sstream> #include <iostream> #include <algorithm>  #include "include/pugixml.hpp"  #define con(m) std::cout << m << '\n' #define err(m) std::cerr << m << std::endl  using str_set = std::set<std::string>;  int main() {     pugi::xml_document doc;      str_set a;     doc.load_file("old.xml");      // fill set ids file     for(auto&& node: doc.child("site_entries").children("entry"))         a.emplace(node.child("id").text().as_string());      str_set b;     doc.load_file("new.xml");      // fill set b ids file b     for(auto&& node: doc.child("site_entries").children("entry"))         b.emplace(node.child("id").text().as_string());      // use <algorithms> library      str_set b_from_a;     std::set_difference(a.begin(), a.end(), b.begin(), b.end()         , std::inserter(b_from_a, b_from_a.begin()));      str_set a_from_b;     std::set_difference(b.begin(), b.end(), a.begin(), a.end()         , std::inserter(a_from_b, a_from_b.begin()));      str_set a_and_b;     std::set_intersection(a.begin(), a.end(), b.begin(), b.end()         , std::inserter(a_and_b, a_and_b.begin()));      for(auto&& v: a)         con("a       : " << v);      con("");      for(auto&& v: b)         con("b       : " << v);      con("");      for(auto&& v: b_from_a)         con("b_from_a: " << v);      con("");      for(auto&& v: a_from_b)         con("a_from_b: " << v);      con("");      for(auto&& v: a_and_b)         con("a_and_b : " << v);      con(""); }

this example xml:

<?xml version="1.0" encoding="iso-8859-1" ?> <site_entries> <entry> <id><![cdata[946757316]]></id> <url><![cdata[http://www.site.co.uk/cgi-bin/tr.cgi?tid=752276]]></url> <content><![cdata[specialized dolce sport 27 speed]]></content> <title><![cdata[bike]]></title> <price><![cdata[£600]]></price> <date><![cdata[01-aug-13]]></date> <display_reference><![cdata[214683-50142933_370647]]></display_reference> <location><![cdata[city of london]]></location> <category><![cdata[bike]]></category> </entry> <entry> <id><![cdata[90007316]]></id> <url><![cdata[http://www.site.co.uk/cgi-bin/tr.cgi?tid=70952276]]></url> <content><![cdata[giant sport offroad bike]]></content> <title><![cdata[bike]]></title> <price><![cdata[£100]]></price> <date><![cdata[11-aug-15]]></date> <display_reference><![cdata[2146433-50142933_370647]]></display_reference> <location><![cdata[city of london]]></location> <category><![cdata[bike]]></category> </entry> </site_entries>

i have hundreds of thousands of total results , tens of thousands of added entires i'm looking efficient way of achieving this. pointers in right direction appreciated.

you can put xml_node objects map - instead of std::set<std::string> use std::map<std::string, pugi::xml_node>.

it's possible/likely using unordered_map faster case though. this:

#include "pugixml.hpp"  #include <iostream> #include <unordered_map>  struct string_hasher {     unsigned int operator()(const char* str) const     {         // jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/jenkins_hash_function#one-at-a-time)         unsigned int result = 0;          while (*str)         {             result += static_cast<unsigned int>(*str++);             result += result << 10;             result ^= result >> 6;         }          result += result << 3;         result ^= result >> 11;         result += result << 15;          return result;     }      bool operator()(const char* lhs, const char* rhs) const     {         return strcmp(lhs, rhs) == 0;     } };  typedef std::unordered_map<const char*, pugi::xml_node, string_hasher, string_hasher> xml_node_map;  int main() {     pugi::xml_document doca, docb;     xml_node_map mapa, mapb;      if (!doca.load_file("a.xml") || !docb.load_file("b.xml"))         return 1;      (auto& node: doca.child("site_entries").children("entry"))         mapa[node.child_value("id")] = node;      (auto& node: docb.child("site_entries").children("entry"))         mapb[node.child_value("id")] = node;      (auto& ea: mapa)         if (mapb.count(ea.first) == 0)         {             std::cout << "removed:" << std::endl;             ea.second.print(std::cout);         }      (auto& eb: mapb)         if (mapa.count(eb.first) == 0)         {             std::cout << "added:" << std::endl;             eb.second.print(std::cout);         } }

notable differences approach:

unordered_map lets reduce complexity of diff - it's o(n+m), not o(nlogn + mlogm)
custom hasher c strings avoids allocating unnecessary memory

of course can simplify using std::unordered_map<std::string, pugi::xml_node> - it's slower, shorter.

Search This Blog

Remember

c++ - Return node information during comparison -

Comments

Post a Comment

Popular posts from this blog

Java 8 + Maven Javadoc plugin: Error fetching URL -

css - SVG using textPath a symbol not rendering in Firefox -

php - Google Calendar Events -