Recommand · September 24, 2021 0

Web Scrape pagination in a single URL (cheerio and axios)

newbie here. I was on web scraping project. And I wanted some guide on web scraping pagination technique. I’m scraping this site https://www.imoney.my/unit-trust-investments. As you can see ,I wanted to retrieve different "Total return" percentage based on Xyears. Right now I’m using cheerio and axios.

const http = require("http");
const axios = require("axios");
const cheerio = require("cheerio");

http
    .createServer(async function (_, res) {
        try {
            const response = await axios.get(
                "https://www.imoney.my/unit-trust-investments"
            );

            const $ = cheerio.load(response.data);

            const funds = [];
            $("[class='list-item']").each((_i, row) => {
                const $row = $(row);

                const fund = $row.find("[class*='product-title']").find("a").text();
                const price = $row.find("[class*='is-narrow product-profit']").find("b").text();
                const risk = $row.find("[class*='product-title']").find("[class*='font-xsm extra-info']").text().replace('/10','');;
                const totalreturn = $row.find("[class*='product-return']").find("[class='font-lg']").find("b").text().replace('%','');

                funds.push({ fund, price, risk, totalreturn});
            });
            
            res.statusCode = 200;
            res.write(JSON.stringify(funds, null, 4));
        } catch (err) {
            res.statusCode = 400;
            res.write("Unable to process request.");
        }
        res.end();
    })
    .listen(8080);

do note, the URL does not change when different year is selected, only the value for total return is changed