Parse durations correctly

2021-07-21 13:20:19 +05:30 · 2021-07-21 13:20:19 +05:30 · deca378937
parent 3fdb7739ff
commit deca378937
2 changed files with 137 additions and 53 deletions
--- a/src/parser.js
+++ b/src/parser.js
@ -21,93 +21,160 @@
 * It is suggested to check their lengths and pick one to parse as the Track Title
 */
 const TS_REGEX = /^((?<trackl>\d{1,3})\.)? *(?<text_1>.*?) *[\(\[]?(?<start_ts>((?<start_hh>\d{1,2}):)?(?<start_mm>\d{1,2}):(?<start_ss>\d{1,2})) *-? *[\)\]]?(?<end_ts>(?<end_hh>\d{1,2}:)?(?<end_mm>\d{1,2}):(?<end_ss>\d{1,2}))? *((?<trackr>\d{1,3})\.)? *(?<text_2>.*?)$/;
-import getArtistTitle from 'get-artist-title'
+import getArtistTitle from "get-artist-title";
 var _options = {};

-function convertTime(h,m,s) {
-  return (+h) * 60 * 60 + (+m) * 60 + (+s)
+// Returns number of total seconds
+function convertTime(h, m, s) {
+  return +h * 60 * 60 + +m * 60 + +s;
 }

+// Only picks out lines which have a timestamp in them
 var filterTimestamp = function(line) {
-  return TS_REGEX.test(line)
+  return TS_REGEX.test(line);
 };

+// Parse each line as per the regex
 var firstPass = function(line) {
  let matches = line.match(TS_REGEX);
-  let track = matches.groups['trackl'] ? +matches.groups['trackl'] : (matches.groups['trackr'] ? +matches.groups['trackr'] : null)
+  let track = matches.groups["trackl"]
+    ? +matches.groups["trackl"]
+    : matches.groups["trackr"]
+    ? +matches.groups["trackr"]
+    : null;
  return {
    track: track,
    start: {
-      ts: matches.groups['start_ts'].length<6 ? `00:${matches.groups['start_ts']}` : matches.groups['start_ts'],
-      hh: matches.groups['start_hh'] ? +matches.groups['start_hh'] : 0,
+      ts:
+        matches.groups["start_ts"].length < 6
+          ? `00:${matches.groups["start_ts"]}`
+          : matches.groups["start_ts"],
+      hh: matches.groups["start_hh"] ? +matches.groups["start_hh"] : 0,
      // These 2 are always set
-      mm: +matches.groups['start_mm'],
-      ss: +matches.groups['start_ss'],
+      mm: +matches.groups["start_mm"],
+      ss: +matches.groups["start_ss"],
    },
-    end: (matches.groups['end_ts']!==undefined ? {
-          ts: matches.groups['end_ts']? matches.groups['end_ts'] : null,
-          hh: matches.groups['end_hh']? +matches.groups['end_hh'] : null,
-          mm: matches.groups['end_mm']? +matches.groups['end_mm'] : null,
-          ss: matches.groups['end_ss']? +matches.groups['end_ss'] : null,
-        } : null),
+    end:
+      matches.groups["end_ts"] !== undefined
+        ? {
+            ts: matches.groups["end_ts"] ? matches.groups["end_ts"] : null,
+            hh: matches.groups["end_hh"] ? +matches.groups["end_hh"] : null,
+            mm: matches.groups["end_mm"] ? +matches.groups["end_mm"] : null,
+            ss: matches.groups["end_ss"] ? +matches.groups["end_ss"] : null,
+          }
+        : null,
    _: {
-      left_text: matches.groups['text_1'],
-      right_text: matches.groups['text_2']
-    }
-  }
+      left_text: matches.groups["text_1"],
+      right_text: matches.groups["text_2"],
+    },
+  };
 };

+// Add a calc attribute with total seconds
 var calcTimestamp = function(obj) {
-  if(obj.end) {
-    obj.end.calc = convertTime(obj.end.hh,obj.end.mm,obj.end.ss)
+  if (obj.end) {
+    obj.end.calc = convertTime(obj.end.hh, obj.end.mm, obj.end.ss);
  }
-  obj.start.calc = convertTime(obj.start.hh,obj.start.mm,obj.start.ss)
-  return obj
-}
+  obj.start.calc = convertTime(obj.start.hh, obj.start.mm, obj.start.ss);
+  return obj;
+};

+// Pick the longer "text" from left or right side.
 var parseTitle = function(obj) {
-  obj.title = obj._.left_text.length > obj._.right_text.length
-    ? obj._.left_text : obj._.right_text;
-  return obj
-}
+  obj.title =
+    obj._.left_text.length > obj._.right_text.length
+      ? obj._.left_text
+      : obj._.right_text;
+  return obj;
+};

+// Parse the text as the title/artist
 var parseArtist = function(obj) {
  let [artist, title] = getArtistTitle(obj.title, {
    defaultArtist: _options.artist,
-    defaultTitle: obj.title
+    defaultTitle: obj.title,
  });
-  obj.artist = artist
-  obj.title = title
-  return obj
+  obj.artist = artist;
+  obj.title = title;
+  return obj;
 };

+// If track numbers are not present, add them accordingly
 var addTrack = function(obj, index) {
-  if (obj.track==null) {
-    obj.track = index+1
+  if (obj.track == null) {
+    obj.track = index + 1;
  }
-  return obj
-}
+  return obj;
+};

+// Add "end" timestamps as next start timestamps
 var addEnd = function(obj, index, arr) {
  if (!obj.end) {
-    if(arr.length!=index+1) {
-      let next = arr[index+1]
-      obj.end = next.start
-      return obj
+    if (arr.length != index + 1) {
+      let next = arr[index + 1];
+      obj.end = next.start;
+      return obj;
    }
  }
-  return obj
-}
+  return obj;
+};

-export function parse (text, options = { artist: 'Unknown' }) {
+var timeToObject = function(obj) {
+  let d = new Date(obj.calc * 1000).toISOString();
+  obj.hh = +d.substr(11, 2);
+  obj.mm = +d.substr(14, 2);
+  obj.ss = +d.substr(17, 2);
+  obj.ts = d.substr(11, 8);
+  return obj;
+};
+
+var fixDurations = function(list) {
+  for (let i in list) {
+    if (i == 0) {
+      // Set the first one to start of track.
+      list[i].start.hh = list[i].start.mm = list[i].start.ss = 0;
+      // And end at the right time
+      list[i].end = { calc: list[i].start.calc };
+      list[i].start.calc = 0
+    } else {
+      // All the others tracks start at the end of the previous one
+      // And end at start time + duration
+      let previous = list[i - 1];
+      list[i].end = { calc: previous.end.calc + list[i].start.calc };
+      list[i].start.calc = previous.end.calc;
+    }
+
+    list[i].start = timeToObject(list[i].start);
+    list[i].end = timeToObject(list[i].end);
+  }
+};
+
+export function parse(text, options = { artist: "Unknown" }) {
  _options = options;
-  return text
-    .split('\n')
+  let durations = false;
+  let result = text
+    .split("\n")
    .filter(filterTimestamp)
    .map(firstPass)
-    .map(calcTimestamp)
+    .map(calcTimestamp);
+
+  result.forEach((current, index, list) => {
+    if (index > 0) {
+      let previous = list[index - 1];
+      if (current.start.calc < previous.start.calc) {
+        durations = true;
+      }
+    }
+  });
+
+  if (durations) {
+    // console.error("Detected durations instead of timestamps. \nIf this is incorrect, pass the --timestamps-only flag and create an issue at \nhttps://github.com/captn3m0/youtube-cue/issues/new/choose")
+    fixDurations(result);
+  }
+
+  return result
    .map(parseTitle)
    .map(parseArtist)
    .map(addTrack)
-    .map(addEnd)
+    .map(addEnd);
 }
--- a/test/parser_test.js
+++ b/test/parser_test.js
@ -5,10 +5,10 @@ import { parse } from "../src/parser.js";

 const TEXT = `
 00:40 The Coders - Hello World
-12:23 This is not the end
+1:00 This is not the end
 Something else in the middle
-1:23:11 Not the last song
-01.   Screens     0:00 - 5:40
+1:23 Not the last song
+01.   Screens     1:40 - 5:40
 02.   Inharmonious Slog     5:40 - 10:11
 03.   The Everyday Push     10:11 - 15:46
 04.   Storm     15:46 - 19:07
@ -77,9 +77,9 @@ describe("Parser", function() {
  });

  it("should parse durations when given", function() {
-    let result = parse(`1. Artist - Title - 6:19
-2. Another Artist - Another Title - 6:59
-3. Yet Another Artist - Yet another title - 5:12`)
+    let result = parse(`1. Artist - Title 6:19
+2. Another Artist - Another Title 6:59
+3. Yet Another Artist - Yet another title 5:12`)
    assert.deepEqual(result[0], {
        artist: "Artist",
        title: "Title",
@ -88,6 +88,23 @@ describe("Parser", function() {
        end: { ts: "00:06:19", hh: 0, mm: 6, ss: 19, calc: 379 },
        _: { left_text: "Artist - Title", right_text: "" },
      })
+
+    assert.deepEqual(result[1], {
+        artist: "Another Artist",
+        title: "Another Title",
+        track: 2,
+        start: { ts: "00:06:19", hh: 0, mm: 6, ss: 19, calc: 379 },
+        end: { ts: "00:13:18", hh: 0, mm: 13, ss: 18, calc: 798 },
+        _: { left_text: "Another Artist - Another Title", right_text: "" },
+      })
+    assert.deepEqual(result[2], {
+        artist: "Yet Another Artist",
+        title: "Yet another title",
+        track: 3,
+        start: { ts: "00:13:18", hh: 0, mm: 13, ss: 18, calc: 798 },
+        end: { ts: "00:18:30", hh: 0, mm: 18, ss: 30, calc: 1110 },
+        _: { left_text: "Yet Another Artist - Yet another title", right_text: "" },
+      })
  });

  it("should parse taylor swift", function() {