I have a set of XML documents that have a rather long list of tags and corresponding values that I am interested in. For simplicity, I decided to create a class that would let me get an object with all the relevant attributes parsed from the XML file.
Having little experience with OOP in Python, I guess I might have made mistakes or suboptimal choices in terms of class design.
Probably, my decision to have a separate utility class for XMLParser was a bad idea, but at least I couldn't come up with a better one.
To sum up, I would appreciate any type of criticism, advice or suggestions with regard to how the classes are designed, as well as code quality in general. Even though it seems that everything works as I expected, I feel like it's a good opportunity to learn new approaches, concepts and level-up my understanding of OOP in Python. Thanks in advance!
from xml.dom import minidom
class XMLDoc:
def __init__(self, path_to_xml):
self.tree = minidom.parse(path_to_xml)
self.parser = XMLParser(self.tree)
self.goods = self._make_goods_list()
self.language = self.parser._get('language')
self.service_type = self.parser._get('serviceType')
self.trader_declaration_number = self.parser._get('traderDeclarationNumber')
self.trader_reference = self.parser._get('traderReference')
self.clearance_location = self.parser._get('clearanceLocation')
self.declaration_time = self.parser._get('declarationTime')
self.declaration_type = self.parser._get('declarationType')
self.correction_code = self.parser._get('correctionCode')
self.customs_office_number = self.parser._get('customsOfficeNumber')
self.dispatch_country = self.parser._get('dispatchCountry')
self.transport_in_container = self.parser._get("transportInContainer")
self.previous_document = PreviousDocument(self.tree, self.parser)
self.consignor = Consignor(self.tree,self.parser)
self.consignee = Consignee(self.tree, self.parser)
self.importer = Importer(self.tree, self.parser)
self.transport_means = Transport(self.tree,self.parser)
self.business = BusinessData(self.tree, self.parser)
def __repr__(self):
return f"XMLDoc {self.trader_reference}"
def _make_goods_list(self):
goods_list = []
all_goods = self.parser.get_goods()
for g in all_goods:
fields_list = self.parser.filter_nested_items(g)
goods_item = GoodsItem(fields_list,self.parser)
goods_list.append(goods_item)
return goods_list
class Consignor:
def __init__(self, tree, parser):
self.tree = tree
self.parser = parser
self.name = self.parser._get("consignor", "name")
self.street = self.parser._get("consignor", "street")
self.postal_code = self.parser._get("consignor", "postalCode")
self.city = self.parser._get("consignor", "city")
self.country = self.parser._get("consignor", "country")
def __repr__(self):
return f"Consignor object: {self.name}"
class Importer:
def __init__(self, tree, parser):
self.tree = tree
self.parser = parser
self.name = self.parser._get("importer", "name")
self.street = self.parser._get("importer", "street")
self.postal_code = self.parser._get("importer", "postalCode")
self.city = self.parser._get("importer", "city")
self.country = self.parser._get("importer", "country")
self.trader_id = self.parser._get("importer", "traderIdentificationNumber")
self.reference = self.parser._get("importer", "importerReference")
def __repr__(self):
return f"Importer object: {self.name}"
class Consignee:
def __init__(self, tree, parser):
self.tree = tree
self.parser = parser
self.name = self.parser._get("consignee", "name")
self.street = self.parser._get("consignee", "street")
self.postal_code = self.parser._get("consignee",
"postalCode")
self.city = self.parser._get("consignee", "city")
self.country = self.parser._get("consignee", "country")
self.trader_id = self.parser._get("consignee",
"traderIdentificationNumber")
def __repr__(self):
return f"Consignee object: {self.name}"
class Transport:
def __init__(self, tree, parser):
self.tree = tree
self.parser = parser
self.mode = self.parser._get("transportMeans",
"transportMode")
self.transportation_type = self.parser._get("transportMeans",
"transportationType")
self.transportation_country = self.parser._get("transportMeans",
"transportationCountry")
self.transportation_number = self.parser._get("transportMeans",
"transportationNumber")
class GoodsItem:
def __init__(self, fields_list, parser):
self.fields_list = fields_list
self.parser = parser
self.trader_item_id = self._get("traderItemID")
self.description = self._get("description")
self.commodity_code = self._get("commodityCode")
self.gross_mass = self._get("grossMass")
self.net_mass = self._get("netMass")
self.permit_obligation = self._get("permitObligation")
self.non_customs_law_obligation = self._get("nonCustomsLawObligation")
self.origin_country = self._get("origin", "originCountry")
self.origin_preference = self._get("origin", "preference")
self.package_type = self._get("packaging", "packagingType")
self.package_quantity = self._get("packaging", "quantity")
self.package_reference_number = self._get("packaging",
"packagingReferenceNumber")
# self.document_type = self._get("producedDocument","documentType")
# self.document_reference_number = self._get("producedDocument","packagingReferenceNumber")
# self.document_issue_date = self._get("producedDocument","issueDate")
self.vat_value = self._get("valuation", "VATValue")
self.vat_code = self._get("valuation", "VATCode")
def _get_inner_field(self, external, field):
matching_tags = self.parser.search_dom_list(self.fields_list, external)
filtered_matching_tags = self.parser.filter_nested_items(matching_tags)
inner_tag = self.parser.search_dom_list(filtered_matching_tags, field)
return inner_tag
def _get(self, field_name, inner_field=None):
if not inner_field:
return self.parser.search_dom_list(self.fields_list,
field_name).firstChild.nodeValue
else:
return self._get_inner_field(field_name,
inner_field).firstChild.nodeValue
class BusinessData:
def __init__(self,tree,parser):
self.tree = tree
self.parser = parser
self.incoterms = self.parser._get("business", "incoterms")
self.customs_account = self.parser._get("business",
"customsAccount")
self.vat_account = self.parser._get("business", "VATAccount")
self.vat_number = self.parser._get("business", "VATNumber")
self.vat_suffix = self.parser._get("business", "VATSuffix")
self.invoice_currency_type = self.parser._get("business",
"invoiceCurrencyType")
class PreviousDocument:
def __init__(self, tree, parser):
self.tree = tree
self.parser = parser
self.doctype = self.parser._get("previousDocument",
"previousDocumentType")
self.reference = self.parser._get("previousDocument",
"previousDocumentReference")
class XMLParser:
def __init__(self, tree):
self.tree = tree
def get_nested_tag(self, parent_tag, element):
reference = [item.firstChild.nodeValue for item in
self.tree.getElementsByTagName(parent_tag)[0].childNodes
if item.nodeType != 3 and item.tagName == element]
try:
return reference[0]
except IndexError:
return None
def get_single_tag(self, element):
reference = self.tree.getElementsByTagName(element)[0].firstChild.nodeValue
return reference
def get_goods(self):
goods = self.tree.getElementsByTagName('goodsItem')
return goods
def search_dom_list(self, dom_list, name):
dom_list = self.filter_items(dom_list)
for item in dom_list:
if item.tagName == name:
return item
def filter_nested_items(self, goods_item):
values = [item for item in goods_item.childNodes if item.nodeType != 3]
return values
def filter_items(self, items):
return [item for item in items if item.nodeType != 3]
def _get(self, field_name, inner_field=None):
if not inner_field:
return self.get_single_tag(field_name)
else:
return self.get_nested_tag(field_name, inner_field)