From deca37893735a9c6e79e050a238850d4be58c344 Mon Sep 17 00:00:00 2001 From: Nemo Date: Wed, 21 Jul 2021 13:20:19 +0530 Subject: [PATCH] Parse durations correctly --- src/parser.js | 161 +++++++++++++++++++++++++++++++------------- test/parser_test.js | 29 ++++++-- 2 files changed, 137 insertions(+), 53 deletions(-) diff --git a/src/parser.js b/src/parser.js index 10b9c09..2ab6f0d 100644 --- a/src/parser.js +++ b/src/parser.js @@ -21,93 +21,160 @@ * It is suggested to check their lengths and pick one to parse as the Track Title */ const TS_REGEX = /^((?\d{1,3})\.)? *(?.*?) *[\(\[]?(?((?\d{1,2}):)?(?\d{1,2}):(?\d{1,2})) *-? *[\)\]]?(?(?\d{1,2}:)?(?\d{1,2}):(?\d{1,2}))? *((?\d{1,3})\.)? *(?.*?)$/; -import getArtistTitle from 'get-artist-title' +import getArtistTitle from "get-artist-title"; var _options = {}; -function convertTime(h,m,s) { - return (+h) * 60 * 60 + (+m) * 60 + (+s) +// Returns number of total seconds +function convertTime(h, m, s) { + return +h * 60 * 60 + +m * 60 + +s; } +// Only picks out lines which have a timestamp in them var filterTimestamp = function(line) { - return TS_REGEX.test(line) + return TS_REGEX.test(line); }; +// Parse each line as per the regex var firstPass = function(line) { let matches = line.match(TS_REGEX); - let track = matches.groups['trackl'] ? +matches.groups['trackl'] : (matches.groups['trackr'] ? +matches.groups['trackr'] : null) + let track = matches.groups["trackl"] + ? +matches.groups["trackl"] + : matches.groups["trackr"] + ? +matches.groups["trackr"] + : null; return { track: track, start: { - ts: matches.groups['start_ts'].length<6 ? `00:${matches.groups['start_ts']}` : matches.groups['start_ts'], - hh: matches.groups['start_hh'] ? +matches.groups['start_hh'] : 0, + ts: + matches.groups["start_ts"].length < 6 + ? `00:${matches.groups["start_ts"]}` + : matches.groups["start_ts"], + hh: matches.groups["start_hh"] ? +matches.groups["start_hh"] : 0, // These 2 are always set - mm: +matches.groups['start_mm'], - ss: +matches.groups['start_ss'], + mm: +matches.groups["start_mm"], + ss: +matches.groups["start_ss"], }, - end: (matches.groups['end_ts']!==undefined ? { - ts: matches.groups['end_ts']? matches.groups['end_ts'] : null, - hh: matches.groups['end_hh']? +matches.groups['end_hh'] : null, - mm: matches.groups['end_mm']? +matches.groups['end_mm'] : null, - ss: matches.groups['end_ss']? +matches.groups['end_ss'] : null, - } : null), + end: + matches.groups["end_ts"] !== undefined + ? { + ts: matches.groups["end_ts"] ? matches.groups["end_ts"] : null, + hh: matches.groups["end_hh"] ? +matches.groups["end_hh"] : null, + mm: matches.groups["end_mm"] ? +matches.groups["end_mm"] : null, + ss: matches.groups["end_ss"] ? +matches.groups["end_ss"] : null, + } + : null, _: { - left_text: matches.groups['text_1'], - right_text: matches.groups['text_2'] - } - } + left_text: matches.groups["text_1"], + right_text: matches.groups["text_2"], + }, + }; }; +// Add a calc attribute with total seconds var calcTimestamp = function(obj) { - if(obj.end) { - obj.end.calc = convertTime(obj.end.hh,obj.end.mm,obj.end.ss) + if (obj.end) { + obj.end.calc = convertTime(obj.end.hh, obj.end.mm, obj.end.ss); } - obj.start.calc = convertTime(obj.start.hh,obj.start.mm,obj.start.ss) - return obj -} + obj.start.calc = convertTime(obj.start.hh, obj.start.mm, obj.start.ss); + return obj; +}; +// Pick the longer "text" from left or right side. var parseTitle = function(obj) { - obj.title = obj._.left_text.length > obj._.right_text.length - ? obj._.left_text : obj._.right_text; - return obj -} + obj.title = + obj._.left_text.length > obj._.right_text.length + ? obj._.left_text + : obj._.right_text; + return obj; +}; +// Parse the text as the title/artist var parseArtist = function(obj) { let [artist, title] = getArtistTitle(obj.title, { defaultArtist: _options.artist, - defaultTitle: obj.title + defaultTitle: obj.title, }); - obj.artist = artist - obj.title = title - return obj + obj.artist = artist; + obj.title = title; + return obj; }; +// If track numbers are not present, add them accordingly var addTrack = function(obj, index) { - if (obj.track==null) { - obj.track = index+1 + if (obj.track == null) { + obj.track = index + 1; } - return obj -} + return obj; +}; +// Add "end" timestamps as next start timestamps var addEnd = function(obj, index, arr) { if (!obj.end) { - if(arr.length!=index+1) { - let next = arr[index+1] - obj.end = next.start - return obj + if (arr.length != index + 1) { + let next = arr[index + 1]; + obj.end = next.start; + return obj; } } - return obj -} + return obj; +}; -export function parse (text, options = { artist: 'Unknown' }) { +var timeToObject = function(obj) { + let d = new Date(obj.calc * 1000).toISOString(); + obj.hh = +d.substr(11, 2); + obj.mm = +d.substr(14, 2); + obj.ss = +d.substr(17, 2); + obj.ts = d.substr(11, 8); + return obj; +}; + +var fixDurations = function(list) { + for (let i in list) { + if (i == 0) { + // Set the first one to start of track. + list[i].start.hh = list[i].start.mm = list[i].start.ss = 0; + // And end at the right time + list[i].end = { calc: list[i].start.calc }; + list[i].start.calc = 0 + } else { + // All the others tracks start at the end of the previous one + // And end at start time + duration + let previous = list[i - 1]; + list[i].end = { calc: previous.end.calc + list[i].start.calc }; + list[i].start.calc = previous.end.calc; + } + + list[i].start = timeToObject(list[i].start); + list[i].end = timeToObject(list[i].end); + } +}; + +export function parse(text, options = { artist: "Unknown" }) { _options = options; - return text - .split('\n') + let durations = false; + let result = text + .split("\n") .filter(filterTimestamp) .map(firstPass) - .map(calcTimestamp) + .map(calcTimestamp); + + result.forEach((current, index, list) => { + if (index > 0) { + let previous = list[index - 1]; + if (current.start.calc < previous.start.calc) { + durations = true; + } + } + }); + + if (durations) { + // console.error("Detected durations instead of timestamps. \nIf this is incorrect, pass the --timestamps-only flag and create an issue at \nhttps://github.com/captn3m0/youtube-cue/issues/new/choose") + fixDurations(result); + } + + return result .map(parseTitle) .map(parseArtist) .map(addTrack) - .map(addEnd) + .map(addEnd); } diff --git a/test/parser_test.js b/test/parser_test.js index 67ef90f..eaa875f 100644 --- a/test/parser_test.js +++ b/test/parser_test.js @@ -5,10 +5,10 @@ import { parse } from "../src/parser.js"; const TEXT = ` 00:40 The Coders - Hello World -12:23 This is not the end +1:00 This is not the end Something else in the middle -1:23:11 Not the last song -01. Screens 0:00 - 5:40 +1:23 Not the last song +01. Screens 1:40 - 5:40 02. Inharmonious Slog 5:40 - 10:11 03. The Everyday Push 10:11 - 15:46 04. Storm 15:46 - 19:07 @@ -77,9 +77,9 @@ describe("Parser", function() { }); it("should parse durations when given", function() { - let result = parse(`1. Artist - Title - 6:19 -2. Another Artist - Another Title - 6:59 -3. Yet Another Artist - Yet another title - 5:12`) + let result = parse(`1. Artist - Title 6:19 +2. Another Artist - Another Title 6:59 +3. Yet Another Artist - Yet another title 5:12`) assert.deepEqual(result[0], { artist: "Artist", title: "Title", @@ -88,6 +88,23 @@ describe("Parser", function() { end: { ts: "00:06:19", hh: 0, mm: 6, ss: 19, calc: 379 }, _: { left_text: "Artist - Title", right_text: "" }, }) + + assert.deepEqual(result[1], { + artist: "Another Artist", + title: "Another Title", + track: 2, + start: { ts: "00:06:19", hh: 0, mm: 6, ss: 19, calc: 379 }, + end: { ts: "00:13:18", hh: 0, mm: 13, ss: 18, calc: 798 }, + _: { left_text: "Another Artist - Another Title", right_text: "" }, + }) + assert.deepEqual(result[2], { + artist: "Yet Another Artist", + title: "Yet another title", + track: 3, + start: { ts: "00:13:18", hh: 0, mm: 13, ss: 18, calc: 798 }, + end: { ts: "00:18:30", hh: 0, mm: 18, ss: 30, calc: 1110 }, + _: { left_text: "Yet Another Artist - Yet another title", right_text: "" }, + }) }); it("should parse taylor swift", function() {