In this post, we will see how to manage a XML in Python using the “xml.etree.ElementTree” module.
The “xml.etree.ElementTree” module is part of Python’s standard library, offering a simple and effective way to work with XML data. This module supports parsing XML from strings or files, navigating and searching the XML tree, and modifying or creating new XML documents. It’s a go-to choice for basic XML file operations due to its availability in the standard Python installation.
For more complex XML processing needs, the ‘lxml’ library is a powerful alternative. It is a third-party library that provides extensive functionalities for working with XML and HTML. It’s known for its performance and comprehensive feature set, including full XPath support, XSLT transformation, and schema validation.
Let’s see some example to manage a file xml with “xml.etree.ElementTree”:
First of all, we define a file xml called data.xml:
[DATA.XML]
<library>
<book id="1">
<title>Python Essentials</title>
<author>John Doe</author>
<year>2020</year>
</book>
<book id="2">
<title>Learning XML</title>
<author>Jane Smith</author>
<year>2019</year>
</book>
</library>
Then, we define a file called CoreXML used to manage a file XML:
[COREXML.PY]
import xml.etree.ElementTree as ET
class CoreXML:
def __init__(self, file_name) -> None:
self.file_name = file_name
# loading file xml
self.file_xml = ET.parse(file_name)
# in this case, the root of the xml file is <library>
self.root_xml = self.file_xml.getroot()
Finally, we define the file main.py:
[MAIN.PY]
from CoreXML import CoreXML
obj_core_xml = CoreXML('data.xml')
Now, we will add in CoreXml file all methods to manage a file xml.
READ:
def read_values(self):
for book in self.root_xml.findall('book'):
id = book.get('id')
title = book.find('title').text
author = book.find('author').text
year = book.find('year').text
print(f'ID: {id} - Title: {title}, Author: {author}, Year: {year}')
from CoreXML import CoreXML
obj_core_xml = CoreXML('data.xml')
obj_core_xml.read_values()
UPDATE:
def update_value(self, id, year):
for book in self.root_xml.findall('book'):
# select the book by id
if book.get('id') == str(id):
# update year with the value in input
book.find('year').text = str(year)
# save the file with the value updated
self.file_xml.write(self.file_name)
from CoreXML import CoreXML
obj_core_xml = CoreXML('data.xml')
obj_core_xml.read_values()
obj_core_xml.update_value(1, 2030)
obj_core_xml.read_values()
INSERT:
def add_book(self, id, title, author, year):
try:
new_book = ET.SubElement(self.root_xml, 'book', id=str(id))
ET.SubElement(new_book, 'title').text = title
ET.SubElement(new_book, 'author').text = author
ET.SubElement(new_book, 'year').text = str(year)
# save the file with the value updated
self.file_xml.write(self.file_name)
except Exception as e:
print(f"Failed to add book: {e}")
from CoreXML import CoreXML
obj_core_xml = CoreXML('data.xml')
obj_core_xml.read_values()
obj_core_xml.add_book(3, "Learning C#", "Damiano Abballe", 2024)
obj_core_xml.read_values()
DELETE:
def delete_book(self, id):
for book in self.root_xml.findall('book'):
# select the book by id
if book.get('id') == str(id):
# update year with the value in input
self.root_xml.remove(book)
# save the file with the value updated
self.file_xml.write(self.file_name)
from CoreXML import CoreXML
obj_core_xml = CoreXML('data.xml')
obj_core_xml.read_values()
obj_core_xml.delete_book(3)
obj_core_xml.read_values()
[COREXML.PY]
import xml.etree.ElementTree as ET
class CoreXML:
def __init__(self, file_name) -> None:
self.file_name = file_name
# loading file xml
self.file_xml = ET.parse(file_name)
# in this case, the root of the xml file is <library>
self.root_xml = self.file_xml.getroot()
def read_values(self):
for book in self.root_xml.findall('book'):
id = book.get('id')
title = book.find('title').text
author = book.find('author').text
year = book.find('year').text
print(f'ID: {id} - Title: {title}, Author: {author}, Year: {year}')
def update_value(self, id, year):
for book in self.root_xml.findall('book'):
# select the book by id
if book.get('id') == str(id):
# update year with the value in input
book.find('year').text = str(year)
# save the file with the value updated
self.file_xml.write(self.file_name)
def add_book(self, id, title, author, year):
try:
new_book = ET.SubElement(self.root_xml, 'book', id=str(id))
ET.SubElement(new_book, 'title').text = title
ET.SubElement(new_book, 'author').text = author
ET.SubElement(new_book, 'year').text = str(year)
# save the file with the value updated
self.file_xml.write(self.file_name)
except Exception as e:
print(f"Failed to add book: {e}")
def delete_book(self, id):
for book in self.root_xml.findall('book'):
# select the book by id
if book.get('id') == str(id):
# update year with the value in input
self.root_xml.remove(book)
# save the file with the value updated
self.file_xml.write(self.file_name)