0

I want to parse HTML table values into a JSON object using CheerioJS.

I'm struggling to get to grips with the Cheerio API, but I have come up with an awful convoluted solution.

I am trying to parse this HTML

    <table summary="Account summary" class="accounts-table">
    <thead>
        <tr>
            <th>Accounts</th>
            <th>Total value</th>
            <th>Available</th>
            <th>Actions</th>
        </tr>
    </thead>
    <tfoot>
        <tr>
            <td>
                Total
            </td>
            <td>
                £TOTALAMOUNT
            </td>
            <td>
                £CASH
            </td>
            <td></td>
        </tr>
    </tfoot>
    <tbody>

        <tr>
            <td style="white-space: normal">
                <a href="https://awebsitehere.co.uk/account_summary/account/22" title="View your Stocks ISA"
                    class="product-name">
                    Stocks ISA
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/account_summary/account/22" title="View your Stocks ISA">
                    ISA-VAL
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/amount_available/account/22"
                    title="View cash summary for your Stocks ISA">
                    ISA-CASH
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/topup/account/22" title="Top up your Stocks ISA"
                    class="top-up-button">
                    Top up
                </a>
                <a href="https://awebsitehere.co.uk/topup/account/22l" title="Place a deal in your Stocks  ISA"
                    class="deal-button">
                </a>
            </td>
        </tr>

        <tr>
            <td style="white-space: normal">
                <a href="https://awebsitehere.co.uk/account_summary/account/26" title="View your Junior ISA"
                    class="product-name">
                    Junior ISA
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/my-accounts/account_summary/account/26"
                    title="View your Junior ISA">
                    JUNIOR-VAL
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/my-accounts/amount_available/account/26"
                    title="View cash summary for your Junior ISA">
                    JUNIOR-CASH
                </a>
            </td>
            <td>
            </td>
        </tr>

        <tr>
            <td style="white-space: normal">
                <a href="https://awebsitehere.co.uk/my-accounts/account_summary/account/98"
                    title="View your Stocks Account" class="product-name">
                    Stocks Account
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/my-accounts/account_summary/account/98"
                    title="View your Stocks Account">
                    STOCKS-VAL
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/my-accounts/amount_available/account/98"
                    title="View cash summary for your stocks Account">
                    STOCKS-CASH
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/my-accounts/stock_and_fund_search/account/98/action/deal"
                    title="Place a deal in your stocks Account" class="deal-button">
                    <span style="padding-right:8px;">Deal</span>
                </a>
            </td>
        </tr>
    </tbody>
</table>

Into a JSON object that looks like this:

{
    "accounts": {
        "Stocks ISA": {
            "investments":
            "total value": stocks-val
            "cash": stocks-cash,
            "link": "the href attached to this account"
        },
        "Junior ISA": {
            "investments":
            "total value": junior-val,
            "cash": junior-cash,
            "link": "the href attached to this account"
        },
        "stocks account": {
            "investments":
            "total value": stocks-val,
            "cash": stocks-cash,
            "link": "the href attached to this account"
        }
    }
}

This is the code that I've attempted so far, but I'm struggling to get my head round the HTML traversing.

const $ = cheerio.load(body)

$('table[class="accounts-table"] tbody tr').each(
    function (i, element) {
        //@ts-ignore
        let children = $(this).children()
        children.each(
            function (i, elem) {
                //@ts-ignore
                let children = $(this).children().text().trim()
            }
        )
    }
)

I would be very grateful to anyone who could point me in the right direction!

1 Answer 1

1

This ought to parse the html into the structure you're looking for:

const $ = cheerio.load(body)

function parseAccountRow(elem) {
    let row = [];
    let href = null;

    $("td a", elem).each((n, link) => {
        row.push($(link).text().trim());
        href = href || link.attribs.href;
    });
    return { name: row[0], value: { "investments": "", "total value": (row[1]+"").toLowerCase(), cash: (row[2]+"").toLowerCase(), link: href } };
}

let parsedObj = {};
$('table[class="accounts-table"] tbody tr').each((i, elem)  => {
    let row = parseAccountRow(elem);
    parsedObj[row.name] = row.value;
});

console.log("Result:", parsedObj);

I'm getting the result below:

{
    "Stocks ISA": {
        "investments": "",
        "total value": "isa-val",
        "cash": "isa-cash",
        "link": "https://awebsitehere.co.uk/account_summary/account/22"
    },
    "Junior ISA": {
        "investments": "",
        "total value": "junior-val",
        "cash": "junior-cash",
        "link": "https://awebsitehere.co.uk/account_summary/account/26"
    },
    "Stocks Account": {
        "investments": "",
        "total value": "stocks-val",
        "cash": "stocks-cash",
        "link": "https://awebsitehere.co.uk/my-accounts/account_summary/account/98"
    }
}
Sign up to request clarification or add additional context in comments.

2 Comments

Thank you very much for this. A spot on answer, very clear, and has helped me with what selectors I should have used.
Cool, glad to be of help.. I'm sure you can improve on it in any case, but it will get you going I hope!

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.