0

I want to split the XML file into separate XML's. By reading the and tags. I have split the XML's using StringUtils.substringsBetween. But, I am able to split only two XML's from the file. The third XML is getting appended with the second.

Program:

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang3.StringUtils;

public class SBuff {

    private BufferedReader br;

    public SBuff() {
        // TODO Auto-generated constructor stub
    }

    public static void main(String[] args) throws IOException {

         String data = "<?xml version=\"1.0\"?>\r\n" + 
                "<company>\r\n" + 
                "    <staff>\r\n" + 
                "        <firstname>yong</firstname>\r\n" + 
                "        <lastname>mook kim</lastname>\r\n" + 
                "        <nickname>mkyong</nickname>\r\n" + 
                "        <salary>100000</salary>\r\n" + 
                "    </staff>\r\n" + 
                "    <staff>\r\n" + 
                "        <firstname>low</firstname>\r\n" + 
                "        <lastname>yin fong</lastname>\r\n" + 
                "        <nickname>fong fong</nickname>\r\n" + 
                "        <salary>200000</salary>\r\n" + 
                "    </staff>\r\n" + 
                "    <staff>\r\n" + 
                "        <firstname>low</firstname>\r\n" + 
                "        <lastname>yin fong</lastname>\r\n" + 
                "        <nickname>fong fong</nickname>\r\n" + 
                "        <salary>200000</salary>\r\n" + 
                "    </staff>\r\n" + 
                "</company>\r\n"+
                "</xml>";

        SBuff s = new SBuff();
        s.loadData(data);
    }

    public void loadData(String stream) throws IOException {
        String[] list = StringUtils.substringsBetween(stream,
                "<staff">", "</staff>");

        StringBuilder stringBuilder = new StringBuilder();
        for (String s : list) {

            stringBuilder.append("<staff>");
            stringBuilder.append(s);
            stringBuilder.append("</staff>");
            System.out.println("##################################");
            System.out.println(stringBuilder.toString());
            System.out.println("##################################");
        }

    }

    public void display(String data) {
        System.out.println("Buffer Data: " + data);
    }
}

Output:

Expected:

##################################
<staff> 
   <firstname>yong</firstname> 
   <lastname>mook kim</lastname>
   <nickname>mkyong</nickname> 
   <salary>100000</salary>
</staff>
##################################
<staff>
   <firstname>low</firstname> 
   <lastname>yin fong</lastname> 
   <nickname>fong fong</nickname> 
   <salary>200000</salary>
</staff>
##################################
<staff>
   <firstname>low</firstname> 
   <lastname>yin fong</lastname> 
   <nickname>fong fong</nickname> 
   <salary>200000</salary>
</staff>
##################################


Actual:

##################################
<staff> 
   <firstname>yong</firstname> 
   <lastname>mook kim</lastname>
   <nickname>mkyong</nickname> 
   <salary>100000</salary>
</staff>
##################################
<staff>
   <firstname>low</firstname> 
   <lastname>yin fong</lastname> 
   <nickname>fong fong</nickname> 
   <salary>200000</salary>
</staff><staff>
   <firstname>low</firstname> 
   <lastname>yin fong</lastname> 
   <nickname>fong fong</nickname> 
   <salary>200000</salary>
</staff>
##################################

I tried with different methods. None seems to workout. Thanks in Advance.

2
  • 3
    Never process XML with string manipulation tools, unless you know exactly what you are doing. Parse the XML string with a XML parser and do your changes at XML element level. Commented Jun 13, 2019 at 12:37
  • manipulation of serialized data of any kind using regex, String#split etc is always a bad idea Commented Jun 13, 2019 at 12:59

2 Answers 2

1

You can use a XML parser like jsoup.

<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.12.1</version>
</dependency>

Parse and splitting:

 Document doc = Jsoup.parse(data, "", Parser.xmlParser());
 for(Element staffElement: doc.getElementsByTag("staff")) {
   System.out.println(staffElement);
   System.out.println("##################################");
 }
Sign up to request clarification or add additional context in comments.

Comments

0

For the actual output - your loadData function would be

public void loadData(String stream) throws IOException {
        String[] list = StringUtils.substringsBetween(stream, "<staff>", "</staff>");

        StringBuilder stringBuilder = null;
        System.out.println("##################################");
        for (String s : list) {
            stringBuilder=new StringBuilder();
            stringBuilder.append("<staff>");
            stringBuilder.append(s);
            stringBuilder.append("</staff>");
           // System.out.println("##################################");
            System.out.println(stringBuilder.toString());
            System.out.println("##################################");
        }

    }

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.