xml.py (4357B)
1 ### EXESOFT XML PARSER ### 2 # Coopyright David Barker 2010 3 # 4 # Python XML parser 5 6 7 8 class XMLNode: 9 def __init__(self, parent, tag, meta, data, inside): 10 self.tag = tag 11 self.meta = meta 12 self.data = data 13 self.inside = inside 14 self.parent = parent 15 self.children = [] 16 self.parsed = False 17 18 19 class XMLParser: 20 def __init__(self, data): 21 self.data = data 22 self.meta = {} 23 self.root = None 24 25 def ReadMeta(self): 26 while "<?" in self.data: 27 index = self.data.find("<?") # Start of tag 28 startindex = index + 2 # Start of tag inside 29 endindex = self.data.find("?>", index) # Tag end 30 31 # Get the contents of the angular brackets and split into separate meta tags 32 metaraw = self.data[startindex:endindex].strip() 33 separated = metaraw.split("\" ") # Split like so ('|' = split off): 34 # thingy = "value|" |other = "whatever|" |third = "woo!" 35 36 for splitraw in separated: 37 split = splitraw.split("=") 38 39 # Add it to the dictionary of meta data 40 self.meta[split[0].strip()] = split[1].strip().strip('\"') 41 42 # Remove this tag from the stored data 43 before = self.data[:index] 44 after = self.data[(endindex + 2):] 45 self.data = "".join([before, after]) 46 47 def GetTagMeta(self, tag): 48 meta = {} 49 50 metastart = tag.find(" ") + 1 51 metaraw = tag[metastart:] 52 separated = metaraw.split("\" ") # Split like so ('|' = split off): 53 # thingy = "value|" |other = "whatever|" |third = "woo!" 54 55 for splitraw in separated: 56 split = splitraw.split("=") 57 58 # Add it to the dictionary of meta data 59 meta[split[0].strip()] = split[1].strip().strip('\"') 60 61 return meta 62 63 def StripXML(self): 64 # Remove comments 65 while "<!--" in self.data: 66 index = self.data.find("<!--") 67 endindex = self.data.find("-->", index) 68 before = self.data[:index] 69 after = self.data[(endindex + 3):] 70 self.data = "".join([before, after]) 71 72 # Remove whitespace 73 self.data = self.data.replace("\n", "").replace("\t", "") 74 75 def GetChildren(self, node): 76 pass 77 78 def GetRoot(self): 79 rootstart = self.data.find("<") 80 rootstartclose = self.data.find(">", rootstart) 81 roottagraw = self.data[(rootstart + 1):rootstartclose] 82 83 rootmeta = {} 84 if len(roottagraw.split("=")) > 1: 85 rootmeta = self.GetTagMeta(roottagraw) 86 87 roottag = roottagraw.strip() 88 89 rootend = self.data.find("</%s" % roottag) 90 rootendclose = self.data.find(">", rootend) 91 rootdata = self.data[rootstart:(rootendclose + 1)].strip() 92 rootinside = self.data[(rootstartclose + 1):rootend] 93 94 self.root = XMLNode(parent = None, tag = roottag, meta = rootmeta, data = rootdata, inside = rootinside) 95 96 def SearchNode(self, node): 97 node.parsed = True 98 99 tempdata = node.inside 100 children = [] 101 102 while "<" in tempdata: 103 start = tempdata.find("<") 104 startclose = tempdata.find(">", start) 105 tagraw = tempdata[(start + 1):startclose] 106 107 meta = {} 108 if "=" in tagraw: 109 meta = self.GetTagMeta(tagraw) 110 111 tag = tagraw.split(" ")[0] 112 113 end = tempdata.find("</%s" % tag) 114 endclose = tempdata.find(">", end) 115 116 data = tempdata[start:(endclose + 1)].strip() 117 inside = tempdata[(startclose + 1):end] 118 119 newnode = XMLNode(node, tag, meta, data, inside) 120 children.append(newnode) 121 122 before = tempdata[:start] 123 after = tempdata[(endclose + 1):] 124 tempdata = "".join([before, after]) 125 126 node.children = children 127 128 for child in node.children: 129 self.SearchNode(child) 130 131 def Parse(self): 132 self.ReadMeta() 133 self.StripXML() 134 self.GetRoot() 135 self.SearchNode(self.root) 136 137 return self.root