I am using the IRS -900 tax file
https://s3.amazonaws.com/irs-form-990/200931393493000150_public.xml
to create a single table containing all elements, attributes with their associated values using SQL OPENXML. I have built the query just to see if I can get few result as shown below. But I only get an empty table.
I also tried to use online utility to create xpath reference or the XML tree of the document to identify the elements and attributes in this long XML file.
Please suggest any easy tool to list all elements and attributes easily as I think the xpath reference is the issue.
Here is my code
--created a table for the xml document inside sql server --Example XML: https://s3.amazonaws.com/irs-form-990/200931393493000150_public.xml
USE [IRS-900]
GO
CREATE TABLE [200931393493000150_public] (
Id INT IDENTITY PRIMARY KEY
,XMLData XML
,LoadedDateTime DATETIME
)
INSERT INTO [200931393493000150_public] (
XMLData
,LoadedDateTime
)
SELECT CONVERT(XML, BulkColumn) AS BulkColumn
,GETDATE()
FROM OPENROWSET(BULK 'C:\Users\200931393493000150_public.xml', SINGLE_BLOB) AS x;
--select * from [dbo].[200931393493000150_public]
DECLARE @x XML -- to load the binary file to a readable xml document in sql
SELECT @x = p
FROM
-- to load the document I used openrowset
openrowset(BULK 'C:\Users\200931393493000150_public.xml', single_blob) AS [200931393493000150_public](p)
DECLARE @hdoc INT -- out put parameter
EXEC sp_xml_preparedocument @hDoc OUTPUT
,@x
SELECT *
--into table x - will load the result into sql table on the fly
-- xpath to the element , 1 is for attributes and 2 for elements
--from openxml(@hdoc, '//*',1)
--from openxml(@hdoc, '/root//*',1)
--from openxml(@hdoc, '/root//*',1)
--with specifies columns to be retreived
FROM openxml(@hdoc, '/root/returnheader/Filer/USAddress', 2)
-- flag 1 for attributes
-- flag 2 for elements
WITH (
EIN CHAR(10) '/../../@EIN'
,-- to refer the xpath for the attribute
AddressLine1 VARCHAR(50)
,AddressLine2 INT
,City VARCHAR(50)
,STATE VARCHAR(50)
,ZIPCode CHAR(5)
)
--catch release
EXEC sp_xml_removedocument @hDoc
GO
-- I have got empty table with the corresponding 6 columns