
本文详细介绍了如何使用Java DOM解析器处理具有多层结构的XML文件,特别关注了getElementsByTagName方法的正确使用以避免全局搜索问题。教程将指导读者如何将不同XML节点中的关联数据进行整合,并通过自定义Java对象实现结构化的数据存储和分组输出,最终呈现一个完整且可读性强的解析方案。
在处理复杂的XML文件时,首先要清晰地理解其结构。本教程以一个包含员工列表(employee_list)、职位详情(position_details)和员工信息(employee_info)的多层XML为例。
<?xml version="1.0" encoding="UTF-8"?>
<employee>
<employee_list>
<employee ID="1">
<firstname>Andrei</firstname>
<lastname>Rus</lastname>
<age>23</age>
<position-skill ref="Java"/>
<detail-ref ref="AndreiR"/>
</employee>
<!-- ... 其他员工 ... -->
</employee_list>
<position_details>
<position ID="Java">
<role>Junior Developer</role>
<skill_name>Java</skill_name>
<experience>1</experience>
</position>
<!-- ... 其他职位 ... -->
</position_details>
<employee_info>
<detail ID="AndreiR">
<username>AndreiR</username>
<residence>Timisoara</residence>
<yearOfBirth>1999</yearOfBirth>
<phone>0</phone>
</detail>
<!-- ... 其他详情 ... -->
</employee_info>
</employee>Java DOM(Document Object Model)解析器将整个XML文档加载到内存中,并将其表示为一棵节点树。这使得开发者可以通过遍历树结构来访问和操作XML数据。核心类包括DocumentBuilderFactory、DocumentBuilder和Document。
在使用DOM解析时,一个常见的陷阱是Document.getElementsByTagName()方法的全局搜索特性。它会在整个文档中查找所有匹配指定标签名的元素,而不管它们在DOM树中的具体位置。
立即学习“Java免费学习笔记(深入)”;
例如,如果直接使用 doc.getElementsByTagName("employee"),它不仅会找到 employee_list 下的 <employee> 元素,还会意外地匹配到根元素 <employee> 本身。由于根元素没有 ID 等子元素,后续尝试获取这些属性或子节点时可能会导致错误或空指针异常。
// 初始尝试可能导致的问题代码片段
NodeList nList = doc.getElementsByTagName("employee"); // 可能会包含根元素<employee>
// ... 遍历nList时,第一个元素可能是根元素,导致后续getAttribute("id")等操作失败为了避免这个问题,我们需要更精确地限定搜索范围。
正确的做法是,首先定位到包含目标元素的父节点,然后在该父节点下进行局部搜索。
例如,要获取所有员工信息,应首先找到 <employee_list> 节点,然后在其内部查找所有的 <employee> 节点。
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.ArrayList;
import java.util.List;
public class XmlParserTutorial {
// 定义用于存储职位信息的内部类
static class PositionDetails {
String id;
String role;
String skillName;
int experience;
public PositionDetails(String id, String role, String skillName, int experience) {
this.id = id;
this.role = role;
this.skillName = skillName;
this.experience = experience;
}
// Getters
public String getId() { return id; }
public String getRole() { return role; }
public String getSkillName() { return skillName; }
public int getExperience() { return experience; }
}
// 定义用于存储员工详细信息的内部类
static class EmployeeInfo {
String id;
String username;
String residence;
int yearOfBirth;
String phone;
public EmployeeInfo(String id, String username, String residence, int yearOfBirth, String phone) {
this.id = id;
this.username = username;
this.residence = residence;
this.yearOfBirth = yearOfBirth;
this.phone = phone;
}
// Getters
public String getId() { return id; }
public String getUsername() { return username; }
public String getResidence() { return residence; }
public int getYearOfBirth() { return yearOfBirth; }
public String getPhone() { return phone; }
}
// 定义用于存储完整员工数据的POJO
static class Person {
String id;
String firstName;
String lastName;
int age;
String role;
String skillName;
int experience;
String username;
String residence;
int yearOfBirth;
String phone;
// Getters and Setters (省略,为简洁起见)
public String getId() { return id; }
public String getFirstName() { return firstName; }
public String getLastName() { return lastName; }
public int getAge() { return age; }
public String getRole() { return role; }
public String getSkillName() { return skillName; }
public int getExperience() { return experience; }
public String getUsername() { return username; }
public String getResidence() { return residence; }
public int getYearOfBirth() { return yearOfBirth; }
public String getPhone() { return phone; }
@Override
public String toString() {
return "PersonId: " + id + "\n" +
" firstname: " + firstName + "\n" +
" lastname: " + lastName + "\n" +
" age: " + age + "\n" +
" role: " + role + "\n" +
" skill_name: " + skillName + "\n" +
" experience: " + experience + "\n" +
" username: " + username + "\n" +
" residence: " + residence + "\n" +
" yearOfBirth: " + yearOfBirth + "\n" +
" phone: " + phone + "\n";
}
}
public static void main(String[] args) {
try {
File xmlDoc = new File("employees.xml"); // 确保XML文件名为employees.xml
DocumentBuilderFactory dbFact = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuild = dbFact.newDocumentBuilder();
Document doc = dBuild.parse(xmlDoc);
doc.getDocumentElement().normalize(); // 规范化文档,处理空白文本节点
System.out.println("Root element: " + doc.getDocumentElement().getNodeName());
System.out.println("-----------------------------------------------------------------------------");
// 1. 解析 position_details 并存储到Map中
Map<String, PositionDetails> positionDetailsMap = new HashMap<>();
NodeList positionListNodes = doc.getElementsByTagName("position_details");
if (positionListNodes.getLength() > 0) {
Element positionDetailsElement = (Element) positionListNodes.item(0);
NodeList positions = positionDetailsElement.getElementsByTagName("position");
for (int i = 0; i < positions.getLength(); i++) {
Node positionNode = positions.item(i);
if (positionNode.getNodeType() == Node.ELEMENT_NODE) {
Element positionElement = (Element) positionNode;
String id = positionElement.getAttribute("ID");
String role = getElementTextContent(positionElement, "role");
String skillName = getElementTextContent(positionElement, "skill_name");
int experience = Integer.parseInt(getElementTextContent(positionElement, "experience"));
positionDetailsMap.put(id, new PositionDetails(id, role, skillName, experience));
}
}
}
// 2. 解析 employee_info 并存储到Map中
Map<String, EmployeeInfo> employeeInfoMap = new HashMap<>();
NodeList employeeInfoListNodes = doc.getElementsByTagName("employee_info");
if (employeeInfoListNodes.getLength() > 0) {
Element employeeInfoElement = (Element) employeeInfoListNodes.item(0);
NodeList details = employeeInfoElement.getElementsByTagName("detail");
for (int i = 0; i < details.getLength(); i++) {
Node detailNode = details.item(i);
if (detailNode.getNodeType() == Node.ELEMENT_NODE) {
Element detailElement = (Element) detailNode;
String id = detailElement.getAttribute("ID");
String username = getElementTextContent(detailElement, "username");
String residence = getElementTextContent(detailElement, "residence");
int yearOfBirth = Integer.parseInt(getElementTextContent(detailElement, "yearOfBirth"));
String phone = getElementTextContent(detailElement, "phone");
employeeInfoMap.put(id, new EmployeeInfo(id, username, residence, yearOfBirth, phone));
}
}
}
// 3. 解析 employee_list 并关联数据
List<Person> people = new ArrayList<>();
NodeList employeeListNodes = doc.getElementsByTagName("employee_list");
if (employeeListNodes.getLength() > 0) {
Element employeeListElement = (Element) employeeListNodes.item(0);
NodeList employees = employeeListElement.getElementsByTagName("employee");
System.out.println("Total Employees found: " + employees.getLength());
System.out.println("-----------------------------------------------------");
for (int i = 0; i < employees.getLength(); i++) {
Node employeeNode = employees.item(i);
if (employeeNode.getNodeType() == Node.ELEMENT_NODE) {
Element employeeElement = (Element) employeeNode;
Person person = new Person();
person.id = employeeElement.getAttribute("ID");
person.firstName = getElementTextContent(employeeElement, "firstname");
person.lastName = getElementTextContent(employeeElement, "lastname");
person.age = Integer.parseInt(getElementTextContent(employeeElement, "age"));
// 获取关联引用
String positionSkillRef = ((Element) employeeElement.getElementsByTagName("position-skill").item(0)).getAttribute("ref");
String detailRef = ((Element) employeeElement.getElementsByTagName("detail-ref").item(0)).getAttribute("ref");
// 从Map中获取关联数据
PositionDetails pos = positionDetailsMap.get(positionSkillRef);
if (pos != null) {
person.role = pos.getRole();
person.skillName = pos.getSkillName();
person.experience = pos.getExperience();
}
EmployeeInfo empInfo = employeeInfoMap.get(detailRef);
if (empInfo != null) {
person.username = empInfo.getUsername();
person.residence = empInfo.getResidence();
person.yearOfBirth = empInfo.getYearOfBirth();
person.phone = empInfo.getPhone();
}
people.add(person);
}
}
}
// 4. 输出分组后的数据
System.out.println("\n=============================================================================================");
System.out.println("Grouped Employee Data:");
System.out.println("=============================================================================================");
for (Person p : people) {
System.out.println(p);
System.out.println("--------------------------------------------------------------------------");
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 辅助方法:获取指定父元素下某个标签的文本内容
* @param parentElement 父元素
* @param tagName 标签名
* @return 标签的文本内容,如果不存在则返回空字符串
*/
private static String getElementTextContent(Element parentElement, String tagName) {
NodeList nodeList = parentElement.getElementsByTagName(tagName);
if (nodeList != null && nodeList.getLength() > 0) {
return nodeList.item(0).getTextContent();
}
return "";
}
}代码说明:
通过本教程,我们学习了如何使用Java DOM解析器处理具有复杂层级和数据关联的XML文件。关键在于:
掌握这些技巧将有助于您更高效、更健壮地处理各种XML解析任务。
以上就是Java DOM解析多层XML文件并实现数据关联与分组输出的详细内容,更多请关注php中文网其它相关文章!
每个人都需要一台速度更快、更稳定的 PC。随着时间的推移,垃圾文件、旧注册表数据和不必要的后台进程会占用资源并降低性能。幸运的是,许多工具可以让 Windows 保持平稳运行。
Copyright 2014-2025 https://www.php.cn/ All Rights Reserved | php.cn | 湘ICP备2023035733号