const AP = require("article-parser"); const tempFile = require("tempfile"); const nodePandoc = require("node-pandoc-promise"); const fs = require("fs"); const { DownloaderHelper } = require("node-downloader-helper"); const path = require('path'); const slugify = require('slugify'); const getArticle = async url => { try { const article = await AP.extract(url); return article; } catch (err) { console.trace(err); } }; module.exports = (url, epubPath, title, coverURL, language="en-US") => { getArticle(url).then(res => { title = title ? title : res.title; epubPath = epubPath ? epubPath : slugify(path.basename(url)) + '.epub'; let date = new Date(Date.parse(res.published)); function pad(number) { if (number < 10) { return '0' + number; } return number; } // Using toISOString() trips Pandoc, which leaves an empty dc:date element instead. let epubDate = date.getUTCFullYear() + '-' + pad(date.getUTCMonth() + 1) + '-' + pad(date.getUTCDate()); let xml = `${title} ${epubDate} ${language} ${url} ${url} ${res.description} ${res.source} ${res.author}`; let html = tempFile(".html"); let metadata = tempFile(".xml"); fs.writeFileSync(html, res.content); fs.writeFileSync(metadata, xml); const imageUrl = coverURL ? coverURL : res.image; const dl = new DownloaderHelper(imageUrl, "/tmp", { fileName: "epub-to-image.jpg", override: true }); dl.start(); dl.on("end", () => { nodePandoc(html, [ "-o", epubPath, `--epub-cover-image=/tmp/epub-to-image.jpg`, `--epub-metadata=${metadata}` ]) .then(res => { console.log(`Generated EPUB file at ${epubPath}`); }) .catch(err => { console.error("Oh No: ", err); }); }); }); };