2020-06-30 09:49:12 +00:00
|
|
|
"./thing.cr"
|
2020-04-07 20:18:48 +00:00
|
|
|
require "./fetch.cr"
|
|
|
|
require "./article.cr"
|
2020-04-07 19:08:31 +00:00
|
|
|
|
|
|
|
module Muse::Dl
|
2020-04-07 19:18:36 +00:00
|
|
|
class Issue
|
2020-06-30 09:49:12 +00:00
|
|
|
getter id : String,
|
|
|
|
title : String | Nil,
|
|
|
|
articles : Array(Muse::Dl::Article),
|
|
|
|
url : String,
|
|
|
|
summary : String | Nil,
|
|
|
|
publisher : String | Nil,
|
|
|
|
info : Hash(String, String),
|
|
|
|
volume : String | Nil,
|
|
|
|
number : String | Nil,
|
|
|
|
date : String | Nil,
|
|
|
|
journal_title : String | Nil
|
2020-04-07 19:18:36 +00:00
|
|
|
|
2020-06-30 13:06:01 +00:00
|
|
|
setter :journal_title
|
|
|
|
|
2020-06-30 12:06:44 +00:00
|
|
|
def initialize(id : String, response : String | Nil = nil)
|
2020-04-07 19:18:36 +00:00
|
|
|
@id = id
|
2020-04-07 20:18:48 +00:00
|
|
|
@url = "https://muse.jhu.edu/issue/#{id}"
|
|
|
|
@articles = [] of Muse::Dl::Article
|
2020-06-30 12:06:44 +00:00
|
|
|
parse(response) if response
|
|
|
|
@info = Hash(String, String).new
|
2020-04-07 20:18:48 +00:00
|
|
|
end
|
|
|
|
|
2020-06-30 12:29:56 +00:00
|
|
|
def open_access
|
|
|
|
if @info.has_key? "Open Access"
|
|
|
|
return @info["Open Access"] == "Yes"
|
|
|
|
end
|
|
|
|
false
|
|
|
|
end
|
|
|
|
|
2020-06-30 13:06:01 +00:00
|
|
|
def parse
|
|
|
|
html = Crest.get(@url).to_s
|
|
|
|
parse(html)
|
|
|
|
end
|
|
|
|
|
2020-06-30 12:06:44 +00:00
|
|
|
def parse(html : String)
|
2020-04-07 20:18:48 +00:00
|
|
|
h = Myhtml::Parser.new html
|
|
|
|
@info = InfoParser.infobox(h)
|
2020-04-07 20:22:07 +00:00
|
|
|
@title = InfoParser.issue_title(h)
|
2020-04-07 20:18:48 +00:00
|
|
|
@summary = InfoParser.summary(h)
|
|
|
|
@publisher = InfoParser.journal_publisher(h)
|
2020-06-16 13:57:11 +00:00
|
|
|
parse_title
|
2020-06-30 08:38:28 +00:00
|
|
|
parse_contents(h)
|
2020-06-16 13:57:11 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def parse_title
|
|
|
|
t = @title
|
|
|
|
unless t.nil?
|
|
|
|
@volume = /Volume (\d+)/.match(t).try &.[1]
|
|
|
|
@number = /Number (\d+)/.match(t).try &.[1]
|
2020-06-30 09:49:12 +00:00
|
|
|
@number = /Issue (\d+)/.match(t).try &.[1] unless @number
|
2020-06-30 13:06:01 +00:00
|
|
|
@date = /((January|February|March|April|May|June|July|August|September|October|November|December|Sring|Winter|Fall|Summer) (\d+))/.match(t).try &.[1]
|
2020-06-30 09:49:12 +00:00
|
|
|
@date = /(\d{4})/.match(t).try &.[1] unless @date
|
2020-06-16 13:57:11 +00:00
|
|
|
end
|
2020-04-07 19:18:36 +00:00
|
|
|
end
|
2020-06-30 08:38:28 +00:00
|
|
|
|
|
|
|
def parse_contents(myhtml : Myhtml::Parser)
|
2020-06-30 13:06:01 +00:00
|
|
|
unless @journal_title
|
|
|
|
journal_title_a = myhtml.css("#journal_banner_title a").first
|
|
|
|
if journal_title_a
|
|
|
|
@journal_title = journal_title_a.inner_text
|
|
|
|
end
|
2020-06-30 09:49:12 +00:00
|
|
|
end
|
|
|
|
myhtml.css(".articles_list_text ol").each do |ol|
|
|
|
|
link = ol.css("li.title a").first
|
|
|
|
title = link.inner_text
|
2020-06-30 08:38:28 +00:00
|
|
|
|
2020-06-30 13:06:01 +00:00
|
|
|
pages = ol.css("li.pg")
|
|
|
|
if pages.size > 0
|
|
|
|
p = pages.first.try &.inner_text
|
|
|
|
matches = /(\d+)-(\d+)/.match p
|
|
|
|
if matches
|
|
|
|
start_page = matches[1].to_i
|
|
|
|
end_page = matches[2].to_i
|
|
|
|
end
|
2020-06-30 09:49:12 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
ol.css("a").each do |l|
|
|
|
|
url = l.attribute_by("href").to_s
|
|
|
|
matches = /\/article\/(\d+)\/pdf/.match url
|
|
|
|
if matches
|
|
|
|
a = Muse::Dl::Article.new matches[1]
|
|
|
|
a.title = title
|
|
|
|
a.start_page = start_page if start_page
|
|
|
|
a.end_page = end_page if end_page
|
|
|
|
@articles.push a
|
|
|
|
end
|
2020-06-30 08:38:28 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2020-04-07 19:08:31 +00:00
|
|
|
end
|
|
|
|
end
|