const AP = require("article-parser");
const tempFile = require("tempfile");
const nodePandoc = require("node-pandoc-promise");
const fs = require("fs");
const { DownloaderHelper } = require("node-downloader-helper");
const path = require('path');
const slugify = require('slugify');
const getArticle = async url => {
try {
const article = await AP.extract(url);
return article;
} catch (err) {
console.trace(err);
}
};
module.exports = (url, epubPath, title, coverURL, language="en-US") => {
getArticle(url).then(res => {
title = title ? title : res.title;
epubPath = epubPath ? epubPath : slugify(path.basename(url)) + '.epub';
let date = new Date(Date.parse(res.published));
function pad(number) {
if (number < 10) {
return '0' + number;
}
return number;
}
// Using toISOString() trips Pandoc, which leaves an empty dc:date element instead.
let epubDate = date.getUTCFullYear() +
'-' + pad(date.getUTCMonth() + 1) +
'-' + pad(date.getUTCDate());
let xml = `${title}
${epubDate}
${language}
${url}
${url}
${res.description}
${res.source}
${res.author}`;
let html = tempFile(".html");
let metadata = tempFile(".xml");
fs.writeFileSync(html, res.content);
fs.writeFileSync(metadata, xml);
const imageUrl = coverURL ? coverURL : res.image;
const dl = new DownloaderHelper(imageUrl, "/tmp", {
fileName: "epub-to-image.jpg",
override: true
});
dl.start();
dl.on("end", () => {
nodePandoc(html, [
"-o",
epubPath,
`--epub-cover-image=/tmp/epub-to-image.jpg`,
`--epub-metadata=${metadata}`
])
.then(res => {
console.log(`Generated EPUB file at ${epubPath}`);
})
.catch(err => {
console.error("Oh No: ", err);
});
});
});
};