5

I am trying to return an XML document in JSON format using a 2-tier server side javascript application. I know MarkLogic can easily switch between JSON and XML formats using the REST api.

However, the following returns xml:

cts.search('something', ['format-xml']) // options may be omitted

but this returns nothing:

cts.search('something', ['format-json'])

According to the documentation I think I have used the function correctly.

Is this possible using server side javascript?

Update:

Using transformToJsonObject as follows:

var json = require('/MarkLogic/json/json.xqy');
var doc = fn.doc('/content/rss/72eb9bf835521446be8d2176e1ac9d22.xml')
var jsonDoc = json.transformToJsonObject(doc, json.config('custom'));
jsonDoc

I get the following error:

500 Internal Server Error

XDMP-STACKOVERFLOW: Stack overflow
in /MarkLogic/json/custom.xqy, at 493:23,
in json-custom:is-ignore-attribute#2(json:object(<json:object xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:json="http://marklogic.com/xdmp/json"><json:entry key="element-qname-from-json-name"><json:value xsi:t...</json:object>...XDMP-ATOMIZEFUNC: (err:FOTY0013) Functions cannot be atomized...), fn:doc("/content/rss/72eb9bf835521446be8d2176e1ac9d22.xml")/newsitem/article/*:html/*:head/*:script[5]/*:script/*:script/*:script/*:link/*:script/*:script/*:script/*:script/*:script/*:script/*:style/*:script/*:script/*:script/*:script/*:script/*:script/*:script/*:script/*:script/*:script/*:link/*:script/*:script/*:noscript/*:link/*:meta/*:body/*:div/*:div/*:div/*:script/*:div/*:div/*:script/*:script/*:div/*:div/*:script/*:script/*:script/*:script/*:noscript/*:p/*:header/*:div/*:div/*:div/*:a/*:section/*:div/*:h2/*:ul/*:li/*:a/*:li/*:a/*:div/*:div/*:a/*:img/*:span/*:script/*:nav/*:div/*:h2/*:ul/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:span/*:div/*:a/*:form/*:div/*:label/*:div/*:script/*:div/*:div/*:div/*:div/*:div/*:a/*:svg/*:title/*:h2/*:a/*:div/*:button/*:div/*:ul/*:li/*:a/*:span/*:li/*:a/*:span/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:div/*:nav/*:a/*:span/*:span/*:ul/*:li/*:a/*:span/*:li/*:a/*:span/*:span/*:div/*:div/*:script/*:script/*:div/*:div/*:span/*:span/*:a/*:div/*:div/*:script/*:script/*:div/*:div/*:div/*:span/*:span/*:a/*:div/*:div/*:script/*:script/*:div/*:div/*:div/*:div/*:h1/*:div/*:ul/*:li/*:div/*:li/*:span/*:a/*:div/*:figure/*:span/*:img/*:span/*:span/*:figcaption/*:span/*:span/*:p/*:p/*:p/*:p/*:p/*:div/*:div/*:script/*:script/*:h2/*:p/*:p/*:figure/*:span/*:div/*:span/*:span/*:figcaption/*:span/*:span/*:p/*:p/*:p/*:p/*:p/*:p/*:p/*:p/*:div/*:a/*:h2/*:a/*:ul/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:div/*:div/*:div/*:h2/*:div/*:ul/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:div/*:div/*:div/*:h2/*:div/*:ul/*:li/*:div/*:div/*:a/*:div/*:span/*:p/*:div/*:div/*:div/*:div/*:script/*:script/*:div/*:div/*:script/*:script/*:div/*:div/*:script/*:script/*:div/*:div/*:h2/*:a/*:strong/*:p/*:div/*:a/*:strong/*:div/*:a/*:strong/*:div/*:div/*:div/*:script/*:script/*:div/*:div/*:h2/*:div/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:div/*:div/*:span/*:span/*:span/*:h3/@class) [1.0-ml]
$config = json:object(<json:object xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:json="http://marklogic.com/xdmp/json"><json:entry key="element-qname-from-json-name"><json:value xsi:t...</json:object>...XDMP-ATOMIZEFUNC: (err:FOTY0013) Functions cannot be atomized...)
$a = fn:doc("/content/rss/72eb9bf835521446be8d2176e1ac9d22.xml")/newsitem/article/*:html/*:head/*:script[5]/*:script/*:script/*:script/*:link/*:script/*:script/*:script/*:script/*:script/*:script/*:style/*:script/*:script/*:script/*:script/*:scr

1 Answer 1

5

The format-xml and format-json options to cts.search() filter the search results by those formats; they don't do any transformations (see the cts.search() options documentation).

There are many ways to transform XML into JSON using MarkLogic; the simplest is probably the json XQuery library, specifically json:transform-to-json-object(). You can use that library in server-side JS like this:

var json = require('/MarkLogic/json/json.xqy');

var doc = cts.doc('/triplestore/97a5ab126bddeea0.xml');
var jsonDoc = json.transformToJsonObject(doc, json.config('custom'));

You can use json.config() to configure and customize the transformation.

cts.search() returns an Iterator, so you'll need a for-of loop (or some kind of accumulator function) to get the actual XML documents, which you could then transform.

Update:

That error could be a bug in the JSON library, but that's a very deep HTML path; and I don't think it makes much sense to transform HTML elements into JSON object properties. Instead, we'll serialize the HTML, and add the string back to our JSON object.

Here's an example transformation of search results; showing how to unwrap the Iterator, customize the JSON transformations, serialize XHTML content for use within JSON, etc.

Notes:

  • this uses fn.subsequence to limit the Iterator to the first 10 results.
  • I'm serializing the excluded <html/> elements to a string (using the xpath() method of the Node object and xdmp.quote(), and adding that to the JSON object as escapedContent.

Here's the combined example; you can run this in MarkLogic QConsole:

var json = require('/MarkLogic/json/json.xqy');
var conf = json.config('custom');

var htmlNs = 'http://www.w3.org/1999/xhtml';

// exclude <html:html/> elements

// Note: this is a little awkward because the JSON library is XQuery
// and requires an XDM sequence, not an Array
conf['ignore-element-names'] = json.arrayValues([
  fn.QName(htmlNs, 'html')
]);

var results = fn.subsequence(
  cts.search(cts.andQuery(null), 'format-xml'),
  1,
  10
);

var transformedResults = [];
var transformed = [];

for (var result of results) {
  // transformToJson() returns an object-node() wrapped in a document-node()
  // convert it to a regular JS object
  transformed = json.transformToJson(result, conf).toObject()

  transformed.escapedContent = xdmp.quote(
    result.xpath('.//html:html', { html: htmlNs}) 
  );

  transformedResults.push(transformed);
}

transformedResults
Sign up to request clarification or add additional context in comments.

6 Comments

Thanks for answering! I have tried but does not work for me yet. Funnily enough I keep getting an XDMP:STACKOVERFLOW error. Its probably something to do with my data though. I will keep trying and if I get it working I'll accept.
Can you update your answer and post the full error message? Also, I've updated my answer with a search iteration example.
I have updated the answer with the error. FYI, The xml document contains one child which has "tidy" html in the body.
Wow, that's a crazy deep path. You probably want that HTML to be a string, not each element as a JSON property. I'll update my answer.
This is an extremely helpful answer in every way! I can't thank you enough.
|

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.