5

I am trying to use a SAX Parser to parse a XML file. Although I keep getting the error below in my code:

    Exception in thread "main" java.net.MalformedURLException: unknown protocol: c
    at java.net.URL.<init>(URL.java:592)
    at java.net.URL.<init>(URL.java:482)
   at java.net.URL.<init>(URL.java:431)
   at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(XMLEntityManager.java:605)
   at com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(XMLVersionDetector.java:189)
   at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:799)
   at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:764)
   at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:123)
   at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1137)
   at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:580)
   at main.main(main.java:28)
   Java Result: 1

Here Is My handler Class:

    public class MySaxParser extends DefaultHandler {

    Index i = new Index(12);
    String bookxmlfilename;
    String tmpValue;
    BookCitation c;
    JournalArticle j;
    Unpublished u;
    ConfProceedings p;

  public MySaxParser() {
      }

    @Override
    public void startElement(String s, String s1, String elementName, Attributes attr) throws SAXException {
        if (elementName.equalsIgnoreCase("JournalArticle")) {
            if (elementName.equalsIgnoreCase("Pages")) {
                j.setstartPage(Integer.parseInt(attr.getValue("StartPage")));
                j.setendPage(Integer.parseInt(attr.getValue("EndPage")));
            }
        }
        if (elementName.equalsIgnoreCase("ConferenceProceedings")) {
            if (elementName.equalsIgnoreCase("Pages")) {
                p.setstartPage(Integer.parseInt(attr.getValue("StartPage")));
                p.setendPage(Integer.parseInt(attr.getValue("EndPage")));
            }
        }
    }

    @Override
    public void endElement(String s, String s1, String element) throws SAXException {
        if (element.equalsIgnoreCase("book")) {
            i.addCitation(c);
            if (element.equalsIgnoreCase("name")) {
                c.setName(tmpValue);
            }
            if (element.equalsIgnoreCase("publisher")) {
                c.setpublisher(tmpValue);
            }
            if (element.equalsIgnoreCase("publicationDate")) {
                c.setdateOfPublication(tmpValue);
            }
            if (element.equalsIgnoreCase("authors")) {
                if (element.equalsIgnoreCase("author")) {
                    c.addAuthor(tmpValue);
                }
            }
            if (element.equalsIgnoreCase("keywords")) {
                if (element.equalsIgnoreCase("keyword")) {
                    c.addKeyword(tmpValue);
                }
            }
        }
        if (element.equalsIgnoreCase("JournalArticle")) {
            i.addCitation(j);
            if (element.equalsIgnoreCase("name")) {
                j.setName(tmpValue);
            }
           if (element.equalsIgnoreCase("TitleOfJournal")) {
                j.settitleOfJournal(tmpValue);
            }
            if (element.equalsIgnoreCase("TitleOfJournal")) {
                j.settitleOfJournal(tmpValue);
            }
            if (element.equalsIgnoreCase("PublicationDate")) {
                j.setpublicationDate(tmpValue);
            }
            if (element.equalsIgnoreCase("volNumber")) {
                j.setvolNumber(Integer.parseInt(tmpValue));
            }
            if (element.equalsIgnoreCase("IssueNumber")) {
                j.setissueNumber(Integer.parseInt(tmpValue));
            }
            if (element.equalsIgnoreCase("authors")) {
                if (element.equalsIgnoreCase("author")) {
                    j.addAuthor(tmpValue);
                }
                if (element.equalsIgnoreCase("keywords")) {
                    if (element.equalsIgnoreCase("keyword")) {
                        j.addKeyword(tmpValue);
                    }
                }
           }
        }
        if (element.equalsIgnoreCase("Unpublished")) {
            i.addCitation(u);
            if (element.equalsIgnoreCase("name")) {
                u.setName(tmpValue);
            }
            if (element.equalsIgnoreCase("authors")) {
            if (element.equalsIgnoreCase("author")) {
                    u.addAuthor(tmpValue);
                }
                if (element.equalsIgnoreCase("keywords")) {
                    if (element.equalsIgnoreCase("keyword")) {
                        u.addKeyword(tmpValue);
                    }
                }
            }
         }

        if (element.equalsIgnoreCase("ConferenceProceedings")) {
            i.addCitation(p);
            if (element.equalsIgnoreCase("name")) {
                p.setName(tmpValue);
            }
            if (element.equalsIgnoreCase("publisher")) {
                p.setpublisher(tmpValue);
            }
            if (element.equalsIgnoreCase("ConferenceLocation")) {
                p.setlocationOfConference(tmpValue);
            }
            if (element.equalsIgnoreCase("TitleOfConferenceproceeding")) {
                p.settitleOfConferenceProc(tmpValue);
            }
            if (element.equalsIgnoreCase("ConferenceYea")) {
                p.setconfYear(Integer.parseInt(tmpValue));
            }
            if (element.equalsIgnoreCase("Editor")) {
                p.seteditor(tmpValue);
            }
            if (element.equalsIgnoreCase("authors")) {
                if (element.equalsIgnoreCase("author")) {
                    p.addAuthor(tmpValue);
                }
                if (element.equalsIgnoreCase("keywords")) {
                    if (element.equalsIgnoreCase("keyword")) {
                        p.addKeyword(tmpValue);
                    }
                }
            }
        }

        if (element.equalsIgnoreCase("FormattingStyle")) {
            i.setFormatType("IEEE");

            }
            if (element.equalsIgnoreCase("FormattingStyle")) {

            try {
                i.formatIEEE(tmpValue);
            } catch (IOException ex) {
                Logger.getLogger(MySaxParser.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
   }

    @Override
    public void characters(char[] ac, int i, int j) throws SAXException {
        tmpValue = new String(ac, i, j);
    }

}

Here is my main class:

    public class main {

     public static void main(String[] args) throws IOException,     ParserConfigurationException, SAXException {
        // Create scanner
        Scanner OswegoNote = new Scanner(System.in);
        //Create a parser factory
        SAXParserFactory factory = SAXParserFactory.newInstance();
        //Make the parser
        SAXParser saxParser = factory.newSAXParser();
        XMLReader parser = saxParser.getXMLReader();
        //Create a handler
        MySaxParser handler = new MySaxParser();
        //Tell the parser to use this handler
        parser.setContentHandler(handler);
        //read and parse the document
        parser.parse("C:\\Users\\mhromalik\\Documents\\Suny Oswego\\fall2013\\csc241fall2012\\Assignment\\MyCitation.html");
        }
}

And here is part of my XML file:

        <Citation>
        <ConferenceProceedings>
            <Name>An efficient implementation of Smith Waterman algorithm on GPU using CUDA, for massively parallel scanning of sequence databases</Name>
            <Publisher>Parallel and Distributed Processing</Publisher>
            <ConferenceLocation>Austin,TX</ConferenceLocation>
            <TitleOfConferenceproceeding> IEEE International Conference on Parallel and Distributed Processing</TitleOfConferenceproceeding>
            <ConferenceYear>2009</ConferenceYear>
            <Editor>S. M. Mann</Editor>
            <Pages StartPage="85" EndPage="102"/>
            <Authors>
                <author>L. L. Ligowski</author>
                <author>W. A. Rudnicki</author>
            </Authors>
            <Keywords>
                <Keyword>Sparse Data</Keyword>
                <Keyword>DNA</Keyword>
                <Keyword>GPU</Keyword>
                <Keyword>Data Mining</Keyword>
            </Keywords>
        </ConferenceProceedings>
    </Citation>
       <FormattingStyle>IEEE</FormattingStyle>
    <FilePath>C:\\Users\\mhromalik\\Documents\\Suny Oswego\\fall2013\\csc241fall2012\\Assignment\\MyCitation.html</FilePath>
</Index>

I can not figure out why this error is happening. Any help would be greatly appreciated!

2 Answers 2

14

You are missing the protocol when you set the path for your html file. As you are trying to read a local html file, you can use file protocol:

file:///{yourfilepath}
Sign up to request clarification or add additional context in comments.

Comments

6

parser.parse() expects a URI, not a filename. You can get from a filename to a URI in Java using

new File(filename).toURI()

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.