DomParser.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. if (!dojo._hasResource["dojox.xml.DomParser"]) { // _hasResource checks added
  2. // by build. Do not use
  3. // _hasResource directly in
  4. // your code.
  5. dojo._hasResource["dojox.xml.DomParser"] = true;
  6. dojo.provide("dojox.xml.DomParser");
  7. dojox.xml.DomParser = new (function() {
  8. /***********************************************************************
  9. * The DomParser is a close-to (but not entirely) conforming XML parser
  10. * based on regular expressions. It will take any XML fragment and
  11. * return a lightweight JS structure that is similar to (but not
  12. * exactly) the DOM specification.
  13. *
  14. * Getter and setter methods are NOT available; the goal was to keep the
  15. * resulting object model entirely JS-like.
  16. *
  17. * All node types but document fragments are supported; all nodes
  18. * support getElementsByTagName and getElementsByTagNameNS (with short
  19. * names byName and byNameNS). The document node supports getElementById
  20. * (byId), and all nodes support a supplimental
  21. * childrenByName/childrenByNameNS method as well.
  22. *
  23. * The object model is intended to be a READONLY format; mutation events
  24. * are NOT supported, and though you can change properties on a
  25. * node-by-node basis, certain operations are not supported (such as
  26. * changing the ID of an element).
  27. **********************************************************************/
  28. // internal use only.
  29. var nodeTypes = {
  30. ELEMENT : 1,
  31. ATTRIBUTE : 2,
  32. TEXT : 3,
  33. CDATA_SECTION : 4,
  34. PROCESSING_INSTRUCTION : 7,
  35. COMMENT : 8,
  36. DOCUMENT : 9
  37. };
  38. // compile the regular expressions once.
  39. var reTags = /<([^>\/\s+]*)([^>]*)>([^<]*)/g;
  40. var reAttr = /([^=]*)="([^"]*)"/g;
  41. var reEntity = /<!ENTITY\s+([^"]*)\s+"([^"]*)">/g;
  42. var reCData = /<!\[CDATA\[([\u0001-\uFFFF]*?)\]\]>/g;
  43. var reComments = /<!--([\u0001-\uFFFF]*?)-->/g;
  44. var trim = /^\s+|\s+$/g;
  45. var normalize = /\s+/g;
  46. var egt = /\&gt;/g;
  47. var elt = /\&lt;/g;
  48. var equot = /\&quot;/g;
  49. var eapos = /\&apos;/g;
  50. var eamp = /\&amp;/g;
  51. var dNs = "_def_";
  52. // create a root node.
  53. function _doc() {
  54. return new (function() {
  55. var all = {};
  56. this.nodeType = nodeTypes.DOCUMENT;
  57. this.nodeName = "#document";
  58. this.namespaces = {};
  59. this._nsPaths = {};
  60. this.childNodes = [];
  61. this.documentElement = null;
  62. // any element with an ID attribute will be added to the
  63. // internal hashtable.
  64. this._add = function(obj) {
  65. if (typeof(obj.id) != "undefined") {
  66. all[obj.id] = obj;
  67. }
  68. };
  69. this._remove = function(id) {
  70. if (all[id]) {
  71. delete all[id];
  72. }
  73. };
  74. this.byId = this.getElementById = function(id) {
  75. return keys[id];
  76. };
  77. this.byName = this.getElementsByTagName = byName;
  78. this.byNameNS = this.getElementsByTagNameNS = byNameNS;
  79. this.childrenByName = childrenByName;
  80. })();
  81. }
  82. // functions attached to element nodes
  83. function byName(name) {
  84. // return all descendants with name. Fully qualified (i.e. svg:svg)
  85. function __(node, name, arr) {
  86. dojo.forEach(node.childNodes, function(c) {
  87. if (c.nodeType == nodeTypes.ELEMENT) {
  88. if (name == "*") {
  89. arr.push(c);
  90. } else if (c.nodeName == name) {
  91. arr.push(c);
  92. }
  93. __(c, name, arr);
  94. }
  95. });
  96. }
  97. var a = [];
  98. __(this, name, a);
  99. return a;
  100. }
  101. function byNameNS(name, ns) {
  102. // return all descendants with name by namespace. If no namespace
  103. // passed, the default is used.
  104. function __(node, name, ns, arr) {
  105. dojo.forEach(node.childNodes, function(c) {
  106. if (c.nodeType == nodeTypes.ELEMENT) {
  107. if (name == "*"
  108. && c.ownerDocument._nsPaths[ns] == c.namespace) {
  109. arr.push(c);
  110. } else if (c.localName == name
  111. && c.ownerDocument._nsPaths[ns] == c.namespace) {
  112. arr.push(c);
  113. }
  114. __(c, name, ns, arr);
  115. }
  116. });
  117. }
  118. if (!ns) {
  119. ns = dNs;
  120. }
  121. var a = [];
  122. __(this, name, ns, a);
  123. return a;
  124. }
  125. // Only child nodes with name.
  126. function childrenByName(name) {
  127. var a = [];
  128. dojo.forEach(this.childNodes, function(c) {
  129. if (c.nodeType == nodeTypes.ELEMENT) {
  130. if (name == "*") {
  131. a.push(c);
  132. } else if (c.nodeName == name) {
  133. a.push(c);
  134. }
  135. }
  136. });
  137. return a;
  138. }
  139. // attribute functions
  140. function getAttr(name) {
  141. for (var i = 0; i < this.attributes.length; i++) {
  142. if (this.attributes[i].nodeName == name) {
  143. return this.attributes[i].nodeValue;
  144. }
  145. }
  146. return null;
  147. }
  148. function getAttrNS(name, ns) {
  149. for (var i = 0; i < this.attributes.length; i++) {
  150. if (this.ownerDocument._nsPaths[ns] == this.attributes[i].namespace
  151. && this.attributes[i].localName == name) {
  152. return this.attributes[i].nodeValue;
  153. }
  154. }
  155. return null;
  156. }
  157. // note that you can only swap IDs using setAttribute, NOT with
  158. // setAttributeNS.
  159. function setAttr(name, val) {
  160. var old = null;
  161. for (var i = 0; i < this.attributes.length; i++) {
  162. if (this.attributes[i].nodeName == name) {
  163. old = this.attributes[i].nodeValue;
  164. this.attributes[i].nodeValue = val;
  165. break;
  166. }
  167. }
  168. if (name == "id") {
  169. if (old != null) {
  170. this.ownerDocument._remove(old);
  171. }
  172. this.ownerDocument._add(this);
  173. }
  174. }
  175. function setAttrNS(name, val, ns) {
  176. for (var i = 0; i < this.attributes.length; i++) {
  177. if (this.ownerDocument._nsPaths[ns] == this.attributes[i].namespace
  178. && this.attributes[i].localName == name) {
  179. this.attributes[i].nodeValue = val;
  180. return;
  181. }
  182. }
  183. }
  184. // navigation
  185. function prev() {
  186. var p = this.parentNode;
  187. if (p) {
  188. for (var i = 0; i < p.childNodes.length; i++) {
  189. if (p.childNodes[i] == this && i > 0) {
  190. return p.childNodes[i - 1];
  191. }
  192. }
  193. }
  194. return null;
  195. }
  196. function next() {
  197. var p = this.parentNode;
  198. if (p) {
  199. for (var i = 0; i < p.childNodes.length; i++) {
  200. if (p.childNodes[i] == this
  201. && (i + 1) < p.childNodes.length) {
  202. return p.childNodes[i + 1];
  203. }
  204. }
  205. }
  206. return null;
  207. }
  208. // the main method.
  209. this.parse = function(/* String */str) {
  210. var root = _doc();
  211. if (str == null) {
  212. return root;
  213. }
  214. if (str.length == 0) {
  215. return root;
  216. }
  217. // preprocess custom entities
  218. if (str.indexOf("<!ENTITY") > 0) {
  219. var entity, eRe = [];
  220. if (reEntity.test(str)) {
  221. reEntity.lastIndex = 0;
  222. // match entities
  223. while ((entity = reEntity.exec(str)) != null) {
  224. eRe.push({
  225. entity : "&" + entity[1].replace(trim, "")
  226. + ";",
  227. expression : entity[2]
  228. });
  229. }
  230. // replace instances in the document.
  231. for (var i = 0; i < eRe.length; i++) {
  232. str = str.replace(new RegExp(eRe[i].entity, "g"),
  233. eRe[i].expression);
  234. }
  235. }
  236. }
  237. // pre-parse for CData, and tokenize.
  238. var cdSections = [], cdata;
  239. while ((cdata = reCData.exec(str)) != null) {
  240. cdSections.push(cdata[1]);
  241. }
  242. for (var i = 0; i < cdSections.length; i++) {
  243. str = str.replace(cdSections[i], i);
  244. }
  245. // pre-parse for comments, and tokenize.
  246. var comments = [], comment;
  247. while ((comment = reComments.exec(str)) != null) {
  248. comments.push(comment[1]);
  249. }
  250. for (i = 0; i < comments.length; i++) {
  251. str = str.replace(comments[i], i);
  252. }
  253. // parse the document
  254. var res, obj = root;
  255. while ((res = reTags.exec(str)) != null) {
  256. // closing tags.
  257. if (res[2].charAt(0) == "/") {
  258. if (obj.parentNode) {
  259. obj = obj.parentNode;
  260. }
  261. continue;
  262. }
  263. // open tags.
  264. if (res[1].length > 0) {
  265. // figure out the type of node.
  266. if (res[1].charAt(0) == "?") {
  267. // processing instruction
  268. var name = res[1].substr(1);
  269. var target = res[2].substr(0, res[2].length - 2);
  270. obj.childNodes.push({
  271. nodeType : nodeTypes.PROCESSING_INSTRUCTION,
  272. nodeName : name,
  273. nodeValue : target
  274. });
  275. } else if (res[1].charAt(0) == "!") {
  276. // CDATA; skip over any declaration elements.
  277. if (res[1].indexOf("![CDATA[") == 0) {
  278. var val = parseInt(res[1].replace("![CDATA[", "")
  279. .replace("]]", ""));
  280. obj.childNodes.push({
  281. nodeType : nodeTypes.CDATA_SECTION,
  282. nodeName : "#cdata-section",
  283. nodeValue : cdSections[val]
  284. });
  285. }
  286. // Comments.
  287. else if (res[1].substr(0, 3) == "!--") {
  288. var val = parseInt(res[1].replace("!--", "")
  289. .replace("--", ""));
  290. obj.childNodes.push({
  291. nodeType : nodeTypes.COMMENT,
  292. nodeName : "#comment",
  293. nodeValue : comments[val]
  294. });
  295. }
  296. } else {
  297. // Elements (with attribute and text)
  298. var name = res[1].replace(trim, "");
  299. var o = {
  300. nodeType : nodeTypes.ELEMENT,
  301. nodeName : name,
  302. localName : name,
  303. namespace : dNs,
  304. ownerDocument : root,
  305. attributes : [],
  306. parentNode : null,
  307. childNodes : []
  308. };
  309. // check to see if it's namespaced.
  310. if (name.indexOf(":") > -1) {
  311. var t = name.split(":");
  312. o.namespace = t[0];
  313. o.localName = t[1];
  314. }
  315. // set the function references.
  316. o.byName = o.getElementsByTagName = byName;
  317. o.byNameNS = o.getElementsByTagNameNS = byNameNS;
  318. o.childrenByName = childrenByName;
  319. o.getAttribute = getAttr;
  320. o.getAttributeNS = getAttrNS;
  321. o.setAttribute = setAttr;
  322. o.setAttributeNS = setAttrNS;
  323. o.previous = o.previousSibling = prev;
  324. o.next = o.nextSibling = next;
  325. // parse the attribute string.
  326. var attr;
  327. while ((attr = reAttr.exec(res[2])) != null) {
  328. if (attr.length > 0) {
  329. var name = attr[1].replace(trim, "");
  330. var val = attr[2].replace(normalize, " ")
  331. .replace(egt, ">").replace(elt, "<")
  332. .replace(eapos, "'")
  333. .replace(equot, '"').replace(eamp, "&");
  334. if (name.indexOf("xmlns") == 0) {
  335. if (name.indexOf(":") > 0) {
  336. var ns = name.split(":");
  337. root.namespaces[ns[1]] = val;
  338. root._nsPaths[val] = ns[1];
  339. } else {
  340. root.namespaces[dNs] = val;
  341. root._nsPaths[val] = dNs;
  342. }
  343. } else {
  344. var ln = name;
  345. var ns = dNs;
  346. if (name.indexOf(":") > 0) {
  347. var t = name.split(":");
  348. ln = t[1];
  349. ns = t[0];
  350. }
  351. o.attributes.push({
  352. nodeType : nodeTypes.ATTRIBUTE,
  353. nodeName : name,
  354. localName : ln,
  355. namespace : ns,
  356. nodeValue : val
  357. });
  358. // only add id as a property.
  359. if (ln == "id") {
  360. o.id = val;
  361. }
  362. }
  363. }
  364. }
  365. root._add(o);
  366. var text = res[3].replace(trim, "");
  367. if (text.length > 0)
  368. o.childNodes.push({
  369. nodeType : nodeTypes.TEXT,
  370. nodeName : "#text",
  371. nodeValue : text
  372. .replace(normalize, " ")
  373. .replace(egt, ">").replace(elt,
  374. "<")
  375. .replace(eapos, "'").replace(
  376. equot, '"').replace(
  377. eamp, "&")
  378. });
  379. if (obj) {
  380. obj.childNodes.push(o);
  381. o.parentNode = obj;
  382. // if it's not a self-closing node.
  383. if (res[2].charAt(res[2].length - 1) != "/") {
  384. obj = o;
  385. }
  386. }
  387. }
  388. }
  389. }
  390. // set the document element
  391. for (var i = 0; i < root.childNodes.length; i++) {
  392. var e = root.childNodes[i];
  393. if (e.nodeType == nodeTypes.ELEMENT) {
  394. root.documentElement = e;
  395. break;
  396. }
  397. }
  398. return root;
  399. };
  400. })();
  401. }