mirror of https://github.com/captn3m0/muse-dl.git
Initial work on parsing the journal page
This commit is contained in:
parent
fcc4f0c48b
commit
d8702b2fcb
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,23 @@
|
|||
require "./spec_helper"
|
||||
|
||||
describe Muse::Dl::Journal do
|
||||
html = File.new("spec/fixtures/journal-159.html").gets_to_end
|
||||
j = Muse::Dl::Journal.new html
|
||||
|
||||
it "it should parse the infobox for 159" do
|
||||
j.info["ISSN"].should eq "1530-7131"
|
||||
j.info["Print ISSN"].should eq "1531-2542"
|
||||
j.info["Coverage Statement"].should eq "Vol. 1 (2001) through current issue"
|
||||
j.info["Open Access"].should eq "No"
|
||||
end
|
||||
|
||||
it "should parser summary" do
|
||||
j.summary.should eq <<-EOT
|
||||
Focusing on important research about the role of academic libraries and librarianship, portal also features commentary on issues in technology and publishing. Written for all those interested in the role of libraries within the academy, portal includes peer-reviewed articles addressing subjects such as library administration, information technology, and information policy. In its inaugural year, portal earned recognition as the runner-up for best new journal, awarded by the Council of Editors of Learned Journals (CELJ). An article in portal, "Master's and Doctoral Thesis Citations: Analysis and Trends of a Longitudinal Study," won the Jesse H. Shera Award for Distinguished Published Research from the Library Research Round Table of the American Library Association.
|
||||
EOT
|
||||
end
|
||||
|
||||
it "should parse publisher" do
|
||||
j.publisher.should eq "Johns Hopkins University Press"
|
||||
end
|
||||
end
|
|
@ -98,7 +98,7 @@ module Muse::Dl
|
|||
puts "Downloaded #{chapter_id}"
|
||||
end
|
||||
|
||||
def self.get_info(url : String) : Muse::Dl::Thing | Nil
|
||||
def self.get_info(url : String)
|
||||
match = /https:\/\/muse.jhu.edu\/(book|journal)\/(\d+)/.match url
|
||||
if match
|
||||
begin
|
||||
|
|
|
@ -50,9 +50,13 @@ module Muse::Dl
|
|||
myhtml.css("#book_about_info .pub a").map(&.inner_text).to_a[0].strip
|
||||
end
|
||||
|
||||
def self.journal_publisher(myhtml : Myhtml::Parser)
|
||||
myhtml.css(".card_publisher a").map(&.inner_text).to_a[0].strip
|
||||
end
|
||||
|
||||
def self.summary(myhtml : Myhtml::Parser)
|
||||
begin
|
||||
return myhtml.css("#book_about_info .card_summary").map(&.inner_text).to_a[0].strip
|
||||
return myhtml.css(".card_summary").map(&.inner_text).to_a[0].strip
|
||||
rescue e : Exception
|
||||
STDERR.puts "Could not fetch summary"
|
||||
return "NA"
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
require "./thing.cr"
|
||||
|
||||
module Muse::Dl
|
||||
class Issue < Muse::Dl::Thing
|
||||
end
|
||||
end
|
|
@ -1,6 +1,20 @@
|
|||
require "./thing.cr"
|
||||
require "./infoparser.cr"
|
||||
require "myhtml"
|
||||
|
||||
module Muse::Dl
|
||||
class Journal < Muse::Dl::Thing
|
||||
class Journal
|
||||
getter :info, :summary, :publisher
|
||||
@info = Hash(String, String).new
|
||||
@summary : String
|
||||
@publisher : String
|
||||
|
||||
private getter :h
|
||||
|
||||
def initialize(html)
|
||||
@h = Myhtml::Parser.new html
|
||||
@info = InfoParser.infobox(h)
|
||||
@summary = InfoParser.summary(h)
|
||||
@publisher = InfoParser.journal_publisher(h)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue