xml.py - PyIgnition

xml.py (4357B)
      1 ### EXESOFT XML PARSER ###
      2 # Coopyright David Barker 2010
      3 #
      4 # Python XML parser
      5 
      6 
      7 
      8 class XMLNode:
      9     def __init__(self, parent, tag, meta, data, inside):
     10         self.tag = tag
     11         self.meta = meta
     12         self.data = data
     13         self.inside = inside
     14         self.parent = parent
     15         self.children = []
     16         self.parsed = False
     17 
     18 
     19 class XMLParser:
     20     def __init__(self, data):
     21         self.data = data
     22         self.meta = {}
     23         self.root = None
     24 
     25     def ReadMeta(self):
     26         while "<?" in self.data:
     27             index = self.data.find("<?")  # Start of tag
     28             startindex = index + 2  # Start of tag inside
     29             endindex = self.data.find("?>", index)  # Tag end
     30 
     31             # Get the contents of the angular brackets and split into separate meta tags
     32             metaraw = self.data[startindex:endindex].strip()
     33             separated = metaraw.split("\" ")  # Split like so ('|' = split off):
     34             # thingy = "value|" |other = "whatever|" |third = "woo!"
     35             
     36             for splitraw in separated:
     37                 split = splitraw.split("=")
     38 
     39                 # Add it to the dictionary of meta data
     40                 self.meta[split[0].strip()] = split[1].strip().strip('\"')
     41 
     42             # Remove this tag from the stored data
     43             before = self.data[:index]
     44             after = self.data[(endindex + 2):]
     45             self.data = "".join([before, after])
     46 
     47     def GetTagMeta(self, tag):
     48         meta = {}
     49         
     50         metastart = tag.find(" ") + 1
     51         metaraw = tag[metastart:]
     52         separated = metaraw.split("\" ")  # Split like so ('|' = split off):
     53         # thingy = "value|" |other = "whatever|" |third = "woo!"
     54 
     55         for splitraw in separated:
     56             split = splitraw.split("=")
     57 
     58             # Add it to the dictionary of meta data
     59             meta[split[0].strip()] = split[1].strip().strip('\"')
     60 
     61         return meta
     62 
     63     def StripXML(self):
     64         # Remove comments
     65         while "<!--" in self.data:
     66             index = self.data.find("<!--")
     67             endindex = self.data.find("-->", index)
     68             before = self.data[:index]
     69             after = self.data[(endindex + 3):]
     70             self.data = "".join([before, after])
     71 
     72         # Remove whitespace
     73         self.data = self.data.replace("\n", "").replace("\t", "")
     74 
     75     def GetChildren(self, node):
     76         pass
     77 
     78     def GetRoot(self):
     79         rootstart = self.data.find("<")
     80         rootstartclose = self.data.find(">", rootstart)
     81         roottagraw = self.data[(rootstart + 1):rootstartclose]
     82         
     83         rootmeta = {}
     84         if len(roottagraw.split("=")) > 1:
     85             rootmeta = self.GetTagMeta(roottagraw)
     86         
     87         roottag = roottagraw.strip()
     88         
     89         rootend = self.data.find("</%s" % roottag)
     90         rootendclose = self.data.find(">", rootend)
     91         rootdata = self.data[rootstart:(rootendclose + 1)].strip()
     92         rootinside = self.data[(rootstartclose + 1):rootend]
     93 
     94         self.root = XMLNode(parent = None, tag = roottag, meta = rootmeta, data = rootdata, inside = rootinside)
     95 
     96     def SearchNode(self, node):
     97         node.parsed = True
     98         
     99         tempdata = node.inside
    100         children = []
    101         
    102         while "<" in tempdata:
    103             start = tempdata.find("<")
    104             startclose = tempdata.find(">", start)
    105             tagraw = tempdata[(start + 1):startclose]
    106 
    107             meta = {}
    108             if "=" in tagraw:
    109                 meta = self.GetTagMeta(tagraw)
    110 
    111             tag = tagraw.split(" ")[0]
    112 
    113             end = tempdata.find("</%s" % tag)
    114             endclose = tempdata.find(">", end)
    115 
    116             data = tempdata[start:(endclose + 1)].strip()
    117             inside = tempdata[(startclose + 1):end]
    118 
    119             newnode = XMLNode(node, tag, meta, data, inside)
    120             children.append(newnode)
    121 
    122             before = tempdata[:start]
    123             after = tempdata[(endclose + 1):]
    124             tempdata = "".join([before, after])
    125 
    126         node.children = children
    127 
    128         for child in node.children:
    129             self.SearchNode(child)
    130 
    131     def Parse(self):
    132         self.ReadMeta()
    133         self.StripXML()
    134         self.GetRoot()
    135         self.SearchNode(self.root)
    136         
    137         return self.root
	PyIgnition https://github.com/animatinator/PyIgnition update for Python 3
	Clone: git clone https://git.frombelow.net/PyIgnition.git
	Log \| Files \| Refs \| README