以前有使用过python 解析xml的内容的两种方法,先贴出来代码,具体的含义之后搞仔细了再补充上来。
xml 文件:
1 2 3 4 5 6 7 8 9 10 11 12 13 <?xml version="1.0" encoding="utf-8"?> <collection > <Cycle1 > <Number > 628398</Number > <Signal > 15168.389648 19429.083984 24276.886719 18786.134766 </Signal > <Background > -739.025574 -691.423401 -794.166931 -1007.662659 </Background > </Cycle1 > <Cycle2 > <Number > 482765</Number > <Signal > 10683.573242 14735.889648 19846.058594 13917.609375 </Signal > <Background > -445.148132 -482.349854 -625.839417 -890.880981 </Background > </Cycle2 > </collection >
使用DOM 解析xml:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 from xml.dom.minidom import parseimport xml.dom.minidomDOMTree = xml.dom.minidom.parse("test.xml" ) collection = DOMTree.documentElement trans = {'Number' : 'NUMBER' , 'Signal' : 'SIGNAL' , 'Background' : 'BACKGROUND' } resultDict = {} for cycle in xrange(1 , 3 ): cycleData = collection.getElementsByTagName("Cycle%d" % cycle) if not cycleData: continue else : for k in trans: value = cycleData[0 ].getElementsByTagName(k)[0 ] value = value.childNodes[0 ].data value = value.strip().split() resultDict.setdefault(trans[k], []).extend(map (float , value)) for k in resultDict: print k, resultDict[k] tigerose@pc ~/github/parseXml $python parseDOM.py SIGNAL [15168.389648 , 19429.083984 , 24276.886719 , 18786.134766 , 10683.573242 , 14735.889648 , 19846.058594 , 13917.609375 ] NUMBER [628398.0 , 482765.0 ] BACKGROUND [-739.025574 , -691.423401 , -794.166931 , -1007.662659 , -445.148132 , -482.349854 , -625.839417 , -890.880981 ]
使用 SAX解析xml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 import xml.saxclass XmlHandler ( xml.sax.ContentHandler ): def __init__ (self ): self.CurrentData = "" self.Number = "" self.Signal = "" self.Background = "" def startElement (self, tag, attributes ): self.CurrentData = tag if tag.startswith('Cycle' ): print "*****%s*****" % tag def endElement (self, tag ): if self.CurrentData == "Number" : print "NUMber:" , self.Number elif self.CurrentData == "Signal" : print "SIGNAL:" , self.Signal elif self.CurrentData == "Background" : print "BACKGROUND:" , self.Background self.CurrentData = "" def characters (self, content ): if self.CurrentData == "Number" : self.Number = content elif self.CurrentData == "Signal" : self.Signal = content elif self.CurrentData == "Background" : self.Background = content if ( __name__ == "__main__" ): parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 0 ) Handler = XmlHandler() parser.setContentHandler( Handler ) parser.parse("test.xml" ) tigerose@pc ~/github/parseXml $python parseSAX.py *****Cycle1***** NUMber: 628398 SIGNAL: 15168.389648 19429.083984 24276.886719 18786.134766 BACKGROUND: -739.025574 -691.423401 -794.166931 -1007.662659 *****Cycle2***** NUMber: 482765 SIGNAL: 10683.573242 14735.889648 19846.058594 13917.609375 BACKGROUND: -445.148132 -482.349854 -625.839417 -890.880981