Monthly Archives: Marzec 2017

Python – odczyt oraz modyfikacja dokumentów XML

2017-03-30

W dzisiejszym wpisie pokażę, w jaki sposób odczytywać oraz modyfikować dokumenty XML w języku Python.

Poniżej znajduje się przykładowy dokument XML zawierający informacje o książkach:

<?xml version='1.0' encoding='UTF-8'?>
<books>
  <book>
    <title>Krótka historia czasu</title>
    <author>Stephen Hawking</author>
    <publisher>Zysk i S-ka</publisher>
    <publication_date>2015</publication_date>
    <chapters>
      <chapter number="1">
        <title>Nasz obraz Wszechświata</title>
        <page>13</page>
      </chapter>
      <chapter number="2">
        <title>Czas i przestrzeń</title>
        <page>25</page>
      </chapter>
    </chapters>
  </book>
  <book>
    <title>Filozofia kosmologii</title>
    <author>Michał Heller</author>
    <publisher>Copernicus Center Press</publisher>
    <publication_date>2013</publication_date>
    <chapters>
      <chapter number="1">
        <title>Kosmologia przed Einsteinem</title>
        <page>13</page>
      </chapter>
      <chapter number="2">
        <title>Kosmologia 1917-1965</title>
        <page>39</page>
      </chapter>
    </chapters>
  </book>
</books>

A oto klasy, które reprezentują obiekty książki i rozdziału z powyższego pliku:

class Book():
    def __init__(self, title):
        self.title = title
        self.author = None
        self.publisher = None
        self.publicationDate = None
        self.chapters = list()

class Chapter():
    def __init__(self, book, title, page):
        self.book = book
        self.title = title
        self.page = page
        self.number = None

Mając dokument XML oraz klasy odpowiednich obiektów, możemy przejść do operacji odczytu oraz modyfikacji dokumentu.

Odczyt danych z pliku XML:

from xml.etree.ElementTree import parse, Element, SubElement

def ReadBooks():
    file = r"D:\App\!Python\Test\books.xml"
    books = list()
    doc = parse(file)
    root = doc.getroot()
    for bookElement in root.iterfind("book"):
        title = bookElement.findtext("title")
        book = Book(title)
        book.author = bookElement.findtext("author")
        book.publisher = bookElement.findtext("publisher")
        book.publicationDate = bookElement.findtext("publication_date")
        for chapterElement in bookElement.iterfind("chapters/chapter"):
            title = chapterElement.findtext("title")
            page = chapterElement.findtext("page")
            chapter = Chapter(book, title, page)
            chapter.number = chapterElement.get("number")
            book.chapters.append(chapter)
        book.chapters.sort(key = lambda c : int(c.page))
        books.append(book)
    return books

Dodanie elementu do pliku XML:

from xml.etree.ElementTree import parse, Element, SubElement

def AddBook(book):
    file = r"D:\App\!Python\Test\books.xml"
    doc = parse(file)
    root = doc.getroot()                    
    bookElement = Element("book")
    SubElement(bookElement, "title").text = book.title
    SubElement(bookElement, "author").text = book.author
    SubElement(bookElement, "publisher").text = book.publisher
    SubElement(bookElement, "publication_date").text = book.publicationDate
    chaptersElement = SubElement(bookElement, "chapters")
    for chapter in book.chapters:
        chapterElement = Element("chapter")
        chapterElement.set("number", chapter.number)
        SubElement(chapterElement, "title").text = chapter.title
        SubElement(chapterElement, "page").text = chapter.page
        chaptersElement.append(chapterElement)
    root.append(bookElement)
    doc.write(file, encoding = "UTF-8", xml_declaration = True)

Modyfikacja elementu w pliku XML:

from xml.etree.ElementTree import parse, Element, SubElement

def EditBook(oldBook, newBook):
    file = r"D:\App\!Python\Test\books.xml"
    doc = parse(file)
    root = doc.getroot()
    bookElement = [b for b in root.iterfind("book") if b.findtext("title") == oldBook.title][0]
    bookElement.find("title").text = newBook.title
    bookElement.find("author").text = newBook.author
    bookElement.find("publisher").text = newBook.publisher
    bookElement.find("publication_date").text = newBook.publicationDate
    doc.write(file, encoding = "UTF-8", xml_declaration = True)

def EditChapter(oldChapter, newChapter):
    file = r"D:\App\!Python\Test\books.xml"
    doc = parse(file)
    root = doc.getroot()
    bookElement = [b for b in root.iterfind("book") if b.findtext("title") == oldChapter.book.title][0]
    chapterElement = [c for c in bookElement.iterfind("chapters/chapter") if c.findtext("title") == oldChapter.title][0]
    chapterElement.set("number", newChapter.number)
    chapterElement.find("title").text = newChapter.title
    chapterElement.find("page").text = newChapter.page
    doc.write(file, encoding = "UTF-8", xml_declaration = True)

Usunięcie elementu z pliku XML:

from xml.etree.ElementTree import parse, Element, SubElement

def DeleteBook(book):
    file = r"D:\App\!Python\Test\books.xml"
    doc = parse(file)
    root = doc.getroot()
    bookElement = [b for b in root.iterfind("book") if b.findtext("title") == book.title][0]
    root.remove(bookElement)
    doc.write(file, encoding = "UTF-8", xml_declaration = True)