I made a basic XML parser supposed to be work in many browsers by using most older functions. What it must do is return an AST (Abstract Syntax Tree) of a XML text in a string.
(it's not cool for HTML because it doesn't recognize names of elements like input, div, etc...)
The following string:
"<!DOCTYPE html><html><Item1 Attribute1="Value"/></html>"
Turns into the following representation array when used in first parameter of call XMLAST
:
[ { type:"List", name:"!DOCTYPE", attributes=[ { name:"html", value:true } ] }, { type:"List", name:"html" }, { type:"Item", name:"Item1", attributes:[ { name:"Attribute1", value:"Value" } ] }, { type:"End" } ]
Both interpreting styles. I removed regular expressions because I care about the support, then just made a loop to remove the unallowed characters from tags or attributes name.
XMLAST=function(text){ var tree, expect, textend, reserve; tree=[]; expect={}; reserve={}; reserve.path=-1; reserve.textf=""; reserve.allowed=[ "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z", "1","2","3","4","5","6","7","8","9","0","-","@","!","#",":", "A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z" ]; textend=function(){ if(expect.textend){ if(expect.tag){ if(expect.tagname){ reserve.length=reserve.textf.length; reserve.tagname=""; for(reserve.i=0;reserve.length>reserve.i;reserve.i++){ reserve.cchar=reserve.textf.charAt(reserve.i); for(reserve.b=0;67>reserve.b;reserve.b++){ if(reserve.cchar===reserve.allowed[reserve.b]){ reserve.tagname+=reserve.cchar; break } } } expect.tagname=false; if(reserve.curchar===">"){ tree.push({ type:"List", name:reserve.tagname, attributes:[] }); expect.tag=false; reserve.path++ }else if(reserve.curchar==="/"){ tree.push({ type:"Item", name:reserve.tagname, attributes:[] }); expect.tag=false; reserve.path++; reserve.closureindex=text.indexOf(">"); if(reserve.closureindex===-1){ text="" }else{ text=text.substring(reserve.closureindex) } expect.tag=false } }else if(expect.closuretagname){ expect.closuretagname=false; expect.tag=false; reserve.closureindex=text.indexOf(">"); if(reserve.closureindex!==-1){ text=text.substring(reserve.closureindex); reserve.parentitem={ type:null }; reserve.expectend=0; //Parent item (list) of current item search. for(reserve.i=reserve.path;reserve.i>-1;reserve.i--){ if(tree[reserve.i].type==="List"){ if(reserve.expectend===0){ reserve.parentitem=tree[reserve.i]; break }else{ reserve.expectend-- } }else if(tree[reserve.i].type==="End"){ reserve.expectend++ } } reserve.expectend=null; if(reserve.textf===reserve.parentitem.name){ tree.push({ type:"End" }); reserve.path++; expect.tag=false } reserve.parentitem=null } }else{ /* ** ATTRIBUTE DETECTED ** ** NAME FORWARD ** */ reserve.length=reserve.textf.length; reserve.attrname=""; for(reserve.i=0;reserve.length>reserve.i;reserve.i++){ reserve.cchar=reserve.textf.charAt(reserve.i); for(reserve.b=0;67>reserve.b;reserve.b++){ if(reserve.cchar===reserve.allowed[reserve.b]){ reserve.attrname+=reserve.cchar; break } } } reserve.attributes.push({ name:reserve.attrname, value:true }); if(reserve.curchar==="="){ /* * CHECK IF ASSIGN VALUE IN QUOTES * */ reserve.valueindex=text.charAt(1); if(reserve.valueindex==='"'||reserve.valueindex==="'"){ reserve.valuestart=text.substring(2); if((reserve.valuendindex=reserve.valuestart.indexOf(reserve.valueindex))===-1){ text=""; expect.tag=false }else{ reserve.attributes[reserve.attributes.length-1].value=reserve.valuestart.substring(0,reserve.valuendindex); text=text.substring(2); text=text.substring(reserve.valuendindex) } } }else if(reserve.curchar===">"||reserve.curchar==="/"){ tree.push({ type:(reserve.curchar==="/"?"Item":"List"), name:reserve.tagname, attributes:reserve.attributes }); reserve.path++; expect.tag=false; if(reserve.curchar==="/"){ reserve.closureindex=text.indexOf(">"); if(reserve.closureindex!==-1){ text=text.substring(reserve.closureindex); reserve.closureindex=null } } } } }else{//Add text as a tree part. tree.push({ type:"Text", value:reserve.textf }); reserve.path++ } }else if(expect.tag){ if(expect.tagname){ expect.tag=false }else if(reserve.curchar==="/"){ tree.push({ type:"Item", name:reserve.tagname, attributes:reserve.attributes }); reserve.path++; expect.tag=false; text=text.substring(1); reserve.closureindex=text.indexOf(">"); if(reserve.closureindex!==-1){ text=text.substring(reserve.closureindex) } }else if(reserve.curchar===">"){ tree.push({ type:"List", name:reserve.tagname, attributes:reserve.attributes }); expect.tag=false; reserve.path++ } } expect.textend=false }; for(;;){ reserve.curchar=text.charAt(0); if(reserve.curchar===""){//End of all. Ignore unfinished rest. textend(); break }else if(reserve.curchar==="<"){//Start element. textend(); expect.closuretagname=false; if(text.substring(1,4)==="!--"){//comment /* <!--* COMMENT *--> */ expect.tag=false; reserve.closureindex=text.indexOf("-->"); tree.push({ type:"Comment", value:( reserve.closureindex===-1?//Has closure? text.substring(4,text.length)//Takes to the end. ://Has closure! text.substring(4,reserve.closureindex)//Takes to the index of -->. ) }); reserve.path++; if(reserve.closureindex!==-1){//Is not it infinite? text=text.substring(reserve.closureindex+2) }else{ break//Stop because comment is infinite. } }else{ /* <* TAG START *... */ expect.tag=true; expect.tagname=true; //Expect for item end and name. reserve.attributes=[];//Reset attributes. reserve.tagname="";//Reset tag name. } }else if(expect.tag&&(reserve.curchar==="/"||reserve.curchar===">"||reserve.curchar==="="||reserve.curchar===" "||reserve.curchar===" ")){ if(reserve.curchar==="/"){ if(expect.tagname&&!expect.textend){ expect.closuretagname=true; expect.tagname=false; expect.textend=false }else if(expect.tag){//closure <.../*> textend() } }else if(reserve.curchar===">"||reserve.curchar==="="||reserve.curchar===" "||reserve.curchar===" "){ textend() } }else{ if(expect.textend){ reserve.textf+=reserve.curchar }else{ reserve.textf=reserve.curchar; expect.textend=true } } text=text.substring(1) }; expect=null; textend=null; text=null; reserve=null; return tree }
What can be improved?