I'd like a way to parse an XML Document without knowing it's document structure or hardcoding it's node/element names to return me a Map in Key, Value pairing.
I'm currently parsing an XML document using stAx, the current implementation works but interestingly enough it's not parsing the whole document. Somehow it's skipping data.
By looking at the XML Document and the test output, you'll notice not all the values are printed. What could I be missing?
Code:
public Map<String, String> p(File file) throws Exception {
Map<String, String> map = new HashMap<String,String>();
XMLStreamReader xr = XMLInputFactory.newInstance().createXMLStreamReader(new FileInputStream(file));
while(xr.hasNext()) {
int e = xr.next();
if (e == XMLStreamReader.START_ELEMENT) {
String name = xr.getLocalName();
xr.next();
String value = null;
try {
value = xr.getText();
} catch (IllegalStateException exep) {
exep.printStackTrace();
}
map.put(name, value);
}
}
return map;
}
@Test
public void test() throws Exception, FactoryConfigurationError, Exception {
File f = new File("xmlDir/request.xml");
Map<String,String> map = p(f);
// Print all Key/Value pairs
for(Map.Entry<String, String> entry: map.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
logger.debug("Key: "+key);
logger.debug("Value: "+value);
}
Assert.assertEquals(map.get("MonthlyPlanPremiumAmtPP"), "136");
Assert.assertEquals(map.get("MonthlyAdvancedPTCAmtPP"), "125");
Assert.assertEquals(map.get("AdjustedGrossIncomeAmt"), "22000");
Assert.assertEquals(map.get("TotalExemptionsCnt"), "1");
}
Output:
2015-08-18 16:21:44,408 : Key: IRS1095A
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: MonthlyAdvancedPTCAmtPP
2015-08-18 16:21:44,409 : Value: 125
2015-08-18 16:21:44,409 : Key: IndividualReturnFilingStatusCd
2015-08-18 16:21:44,409 : Value: 1
2015-08-18 16:21:44,409 : Key: IRS1040
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: MonthlyPTCInformationGrpPP
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: MonthlyPremiumSLCSPAmtPP
2015-08-18 16:21:44,409 : Value: 250
2015-08-18 16:21:44,409 : Key: Filer
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: TotalPremiumSLCSPAmtPP
2015-08-18 16:21:44,409 : Value: 3000
2015-08-18 16:21:44,409 : Key: ResidentStateAbbreviationCdPP
2015-08-18 16:21:44,409 : Value: CA
2015-08-18 16:21:44,409 : Key: TotalPlanPremiumAmtPP
2015-08-18 16:21:44,409 : Value: 1632
2015-08-18 16:21:44,409 : Key: TotalExemptionsCnt
2015-08-18 16:21:44,409 : Value: 1
2015-08-18 16:21:44,409 : Key: TotalAdvancedPTCAmtPP
2015-08-18 16:21:44,409 : Value: 1500
2015-08-18 16:21:44,409 : Key: MonthlyPlanPremiumAmtPP
2015-08-18 16:21:44,409 : Value: 136
2015-08-18 16:21:44,409 : Key: RecipientSSNPP
2015-08-18 16:21:44,409 : Value: 555-11-2222
2015-08-18 16:21:44,409 : Key: WagesSalariesAndTipsAmt
2015-08-18 16:21:44,409 : Value: 22000
2015-08-18 16:21:44,409 : Key: MonthCdPP
2015-08-18 16:21:44,409 : Value: NOVEMBER
2015-08-18 16:21:44,409 : Key: ReturnData
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: PrimaryResidentStatesInfoGrpPP
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: SelfSelectPINGrp
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: ResidentStateInfoPP
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: Return
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: PrimaryBirthDt
2015-08-18 16:21:44,409 : Value: 1970-01-01
2015-08-18 16:21:44,409 : Key: ReturnHeader
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: AdjustedGrossIncomeAmt
2015-08-18 16:21:44,409 : Value: 22000
2015-08-18 16:21:44,409 : Key: PrimarySSN
2015-08-18 16:21:44,409 : Value: 555-11-2222
XML Document: request.xml
<Return xmlns="http://www.irs.gov/efile">
<ReturnData>
<IRS1095A uuid="a77f40a2-af31-4404-a27d-4c1eaad730c2">
<MonthlyPTCInformationGrpPP uuid="69dc9dd5-5415-4ee4-a199-19b2dbb701be">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthCdPP>SEPTEMBER</MonthCdPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="8495fa61-0e7c-45e3-8f07-9765f4ef2fc3">
<MonthCdPP>OCTOBER</MonthCdPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="7de1052f-6107-41da-aea4-e4495018fc80">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthCdPP>APRIL</MonthCdPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="634d5af9-51fb-42ee-a90d-5a4f421e6854">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthCdPP>JUNE</MonthCdPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="a2f7de3f-650c-4a5e-b26c-30cfd7782d6c">
<MonthCdPP>MAY</MonthCdPP>
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="a77f40a2-af31-4404-a27d-4c1eaad730c2">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthCdPP>JANUARY</MonthCdPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="01650aee-9d5d-4ce1-9079-ebedea3bf416">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthCdPP>MARCH</MonthCdPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="581ba189-222d-4999-aa1a-3b290666ef5f">
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthCdPP>AUGUST</MonthCdPP>
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
</MonthlyPTCInformationGrpPP>
<TotalPremiumSLCSPAmtPP>3000</TotalPremiumSLCSPAmtPP>
<MonthlyPTCInformationGrpPP uuid="549ff57a-58dc-4365-b05c-e3e520b3e8cb">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthCdPP>DECEMBER</MonthCdPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="195836cf-32b3-4316-99d4-6b1eab31e16d">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthCdPP>JULY</MonthCdPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="c1289d91-7ce1-41ee-9c8a-f72212e82752">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthCdPP>FEBRUARY</MonthCdPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
</MonthlyPTCInformationGrpPP>
<TotalAdvancedPTCAmtPP>1500</TotalAdvancedPTCAmtPP>
<RecipientSSNPP>555-11-2222</RecipientSSNPP>
<MonthlyPTCInformationGrpPP uuid="50876222-165d-442a-81e0-0b05dc3c30fb">
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthCdPP>NOVEMBER</MonthCdPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
</MonthlyPTCInformationGrpPP>
<TotalPlanPremiumAmtPP>1632</TotalPlanPremiumAmtPP>
</IRS1095A>
<IRS1040>
<IndividualReturnFilingStatusCd>1</IndividualReturnFilingStatusCd>
<WagesSalariesAndTipsAmt>22000</WagesSalariesAndTipsAmt>
<TotalExemptionsCnt>1</TotalExemptionsCnt>
<AdjustedGrossIncomeAmt>22000</AdjustedGrossIncomeAmt>
</IRS1040>
</ReturnData>
<ReturnHeader>
<SelfSelectPINGrp>
<PrimaryBirthDt>1970-01-01</PrimaryBirthDt>
</SelfSelectPINGrp>
<Filer>
<PrimarySSN>555-11-2222</PrimarySSN>
<PrimaryResidentStatesInfoGrpPP>
<ResidentStateInfoPP uuid="a77f40a2-af31-4404-a27d-4c1eaad730c2">
<ResidentStateAbbreviationCdPP>CA</ResidentStateAbbreviationCdPP>
</ResidentStateInfoPP>
</PrimaryResidentStatesInfoGrpPP>
</Filer>
</ReturnHeader>
</Return>