Adds webmock

This commit is contained in:
Nemo 2020-04-08 01:48:48 +05:30
parent ff225b12c6
commit f11f64b9d5
8 changed files with 2038 additions and 2 deletions

View File

@ -12,3 +12,7 @@ shards:
github: kostya/myhtml
version: 1.5.1
webmock:
github: manastech/webmock.cr
commit: 78bb0e3b5850c700da0e7fbdd2d6c180cc4a061b

View File

@ -16,3 +16,8 @@ dependencies:
github: kostya/myhtml
crest:
github: mamantoha/crest
development_dependencies:
webmock:
github: manastech/webmock.cr
branch: master

View File

@ -2,6 +2,10 @@ require "./spec_helper"
# require "errors/muse_corrupt_pdf.cr"
describe Muse::Dl::Book do
headers = {"Content-Type" => "text/html"}
WebMock.stub(:get, "https://muse.jhu.edu/chapter/2379787/pdf")
.to_return(body_io: File.new("spec/fixtures/chapter-2379787.html"), headers: headers)
it "should notice the unable to construct chapter PDF error" do
f = "/tmp/chapter-2379787.pdf"
File.delete(f) if File.exists? f

359
spec/fixtures/chapter-2379787.html vendored Normal file
View File

@ -0,0 +1,359 @@
<style>
.page404 {
display: table;
width: 100%;
padding: 60px 4em;
min-height: 350px;
}
.page404 .int {
display: table-cell;
vertical-align: middle;
text-align: left;
}
.page404 h4 {
margin-bottom: 10px;
font-weight: 700;
}
.page404 .logo {
display: table-cell;
width: 23%;
vertical-align: middle;
padding-right: 30px;
}
.page404 blockquote {
border: none;
padding-left: 0;
}
</style>
<!DOCTYPE html>
<html lang="en">
<head>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-58347753-2"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-58347753-2');
</script>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta property="og:image" content="/images/muselogo_dark.jpg" />
<title>Project MUSE</title>
<link rel="search" type="application/opensearchdescription+xml" title="Search Project MUSE from your browser's Searchbar" href="/plugins/muse-opensearch.xml" />
<link rel="stylesheet" type="text/css" href="/css/normalize.css"/>
<link href="/css/jquery.qtip2.css" rel="stylesheet" type="text/css" />
<!-- foundation 6.4.1 custom float/typ/vis 250rem max width 30col float grid -->
<link href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:300,400,400i,600,600i,700,700i" rel="stylesheet">
<link rel="stylesheet" type="text/css" href="/css/foundation.min.css"/>
<link rel="stylesheet" type="text/css" href="/css/style_home2.css?031820"/>
<script type="text/javascript" src="/js/jquery3.js"></script>
<script type="text/javascript" src="/js/pre.js"></script>
<script type="text/javascript" src="/js/core/head.js?new"></script>
<script type="text/javascript" src="https://s7.addthis.com/js/250/addthis_widget.js#pubid=ra-4ecb5479089cb81a"></script>
<title>Article</title>
</head>
<body>
<a id="skip" href="#skip_target">[Skip to Content]</a>
<span id="top"></span>
<div id="header" role="banner" aria-label="header">
<div class="row wrap" id="institution_banner">
<div class="content">
<div id="institution_wrap" class="columns small-15 medium-text-left">
<div id="institution" class="img_text_col">
<div class="img_contain_left"><img src="/images/institution.png" alt="institution icon" /></div>
<div class="text_contain_left"><span class="small"><a href='/account' class='color_white login_status'>Institutional Login</a></span></div>
</div>
</div>
<div id="person_wrap" class="columns small-15">
<div id="person" class="img_text_col">
<div class="img_contain_right"><img src="/images/person.png" alt="account icon" /></div>
<div class="text_contain_right"><span class="small"><a href="/account/" class="color_white login_status" onclick="gtag('event', 'click', {'event_category': 'Account link', 'event_label': 'account name link - header'});">LOG IN</a></span></div>
</div>
</div>
</div>
</div>
<div class="row wrap" id="search_banner">
<div class="content">
<div class="medium-4 small-4 columns" id="header_logo_wrap">
<div id="header_logo">
<a href="/"><img src="/images/muselogo.png" alt="Project MUSE" class="show-for-large"/>
<img src="/images/muselogo_notext.png" alt="Project MUSE" class="hide-for-large"/></a>
</div>
</div>
<div class="medium-21 small-22 columns" id="search_bar_wrap">
<div class="row">
<div id="browse_button_wrap">
<a id="browse_button" href="/browse" onclick="gtag('event', 'click', {'event_category': 'Browse link', 'event_label': 'browse button - header'});"><span class="small">browse</span></a>
</div>
<div id="or_text_wrap" class="show-for-medium">
<div id="or_text">
<span class="small">or</span>
</div>
</div>
<div id="search_input_wrap" class="small-30">
<div id="search_input">
<noscript>
<form method="post" action="/search/">
<input name="no_js_header_query"/>
<input type="hidden" name="action" value="search"/>
<input type="hidden" name="t" value="header"/>
<a id="search_button">
<input type="image" src="/images/search_white.png" alt="Search icon"/>
</a>
</form>
</noscript>
<script>document.write('<input name="search_input_header" id="search_input_header" aria-label="search input"/>');</script>
<script>document.write('<a id="search_button"><img src="/images/search_white.png" alt="Search icon"/></a>');</script>
</div>
</div>
</div>
</div>
<div class="medium-5 small-4 columns" id="menu_wrap">
<div id="menu" class="menu-btn">
<div class="nav-toggle">
<div class="nav-toggle-btn">
<a href="#" class="menu-icon-wrap">
<span class="icon"></span>
<span class="small show-for-large">menu</span>
</a>
</div>
<div class="nav-mobile">
<a href="/search">Advanced Search</a>
<a href="/browse">Browse</a>
<script>
document.write('<div class="accordion">');
</script>
<noscript>
<div class="accordion noscript">
</noscript>
<a href="#" class="acc_trig open"><span>MyMUSE Account</span></a>
<div class="acc_block">
<a href="/account">Log In / Sign Up</a>
<a href="/account/change">Change My Account</a>
<a href="/account/user_settings">User Settings</a>
<a href="/account/">Access via Institution</a>
<a href="/account/saved_items">MyMUSE Library</a>
<a href="/account/search_history">Search History</a>
<a href="/account/view_history">View History</a>
<a href="/account/purchase_history">Purchase History</a>
<a href="/account/alerts">MyMUSE Alerts</a>
</div>
</div>
<div class="nav-mobile-footer">
<!--<a class="modal_trigger">Contact Support</a>-->
<a href="/contact">Contact Support</a>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="page404" id="main">
<div class="logo">
<img src="/images/muselogo_notext.png" alt="MUSE logo">
</div>
<div class="int">
<html><head><title>Error</title></head><body>Unable to construct chapter PDF</body></html>
</div>
</div>
<div id="footer_block" role="banner" aria-label="footer">
<div class="content">
<div class="wrap row" id="about_wrap">
<div id="about">
<h3>Project MUSE Mission</h3>
<p>Project MUSE promotes the creation and dissemination of essential humanities and social science resources through collaboration with libraries, publishers, and scholars worldwide. Forged from a partnership between a university press and a library, Project MUSE is a trusted part of the academic and scholarly community it serves.</p>
</div>
<div id="about_logo" class="columns medium-10 show-for-large">
<img src="/images/muselogo_notext.png" alt="MUSE logo"/>
</div>
</div>
</div>
<div class="footer_main">
<div class="footer_item_color wrap">
<div class="footer_item_left">
<div class="group">
<div class="footer_item_about cont_sub">
<h5 class="small">about</h5>
<ul>
<li><a href="https://about.muse.jhu.edu/publishers">Publishers</a></li>
<li><a href="https://about.muse.jhu.edu/about/discovery-partners/">Discovery Partners</a></li>
<li><a href="https://about.muse.jhu.edu/about/advisory-board/">Advisory Board</a></li>
<li><a href="https://about.muse.jhu.edu/about/journal-subscribers/">Journal Subscribers</a></li>
<li><a href="https://about.muse.jhu.edu/about/book-customers">Book Customers</a></li>
<li><a href="https://about.muse.jhu.edu/about/at-conferences/">Conferences</a></li>
</ul>
</div>
<div class="footer_item_res cont_sub">
<h5 class="small">resources</h5>
<ul>
<li><a href="https://about.muse.jhu.edu/resources/news/">News & Announcements</a></li>
<li><a href="https://about.muse.jhu.edu/resources/promotional-materials">Promotional Material</a></li>
<li><a href="https://about.muse.jhu.edu/resources/alerts">Get Alerts</a></li>
<li><a href="https://about.muse.jhu.edu/resources/muse-presentations">Presentations</a></li>
</ul>
</div>
<div class="clear"></div>
</div>
<div class="group">
<div class="footer_item_what cont_sub">
<h5 class="small">what's on muse</h5>
<ul>
<li><a href="https://about.muse.jhu.edu/muse">Open Access</a></li>
<li><a href="https://about.muse.jhu.edu/pub/journals">Journals</a></li>
<li><a href="https://about.muse.jhu.edu/pub/books">Books</a></li>
</ul>
</div>
<div class="footer_item_info cont_sub">
<h5 class="small">information for</h5>
<ul>
<li><a href="https://about.muse.jhu.edu/publishers">Publishers</a></li>
<li><a href="https://about.muse.jhu.edu/librarians">Librarians</a></li>
<li><a href="https://about.muse.jhu.edu/individuals">Individuals</a></li>
</ul>
</div>
<div class="clear"></div>
</div>
</div>
<div class="footer_item_right">
<div class="group">
<div class="footer_item_social cont_sub">
<h5 class="small">Contact</h5>
<ul>
<li class="clear"><a href="/contact">Contact Us</a></li>
<li><a href="https://about.muse.jhu.edu/resources/help-overview">Help</a></li>
</ul>
<ul>
<li>
<ol class="social_icons">
<li class="list_h"><a href="https://www.facebook.com/ProjectMUSE"><img src="/images/footer_icon_fb.png" alt="Facebook" /></a></li>
<li class="list_h"><a href="https://www.linkedin.com/company/projectmuse/"><img src="/images/footer_icon_linkedin.png" alt="Linkedin" /></a></li>
<li class="list_h"><a href="https://twitter.com/ProjectMUSE"><img src="/images/footer_icon_twitter.png" alt="Twitter" /></a></li>
</ol>
</li>
</ul>
</div>
<div class="footer_item_policy cont_sub">
<h5 class="small">Policy & Terms</h5>
<ul>
<li><a href="https://about.muse.jhu.edu/about/accessibility/">Accessibility</a></li>
<li><a href="/privacy_policy">Privacy Policy</a></li>
<li><a href="/terms_use">Terms of Use</a></li>
</ul>
</div>
<div class="clear"></div>
</div>
<div class="group">
<div class="footer_item_addr cont_sub">
<p class="address"><span>2715 North Charles Street<br/>Baltimore, Maryland, USA 21218</span></p>
<p class="phone"><span><a href="tel:1-410-516-6989">+1 (410) 516-6989</a></span><br>
<span><a href="mailto:muse@press.jhu.edu">muse@press.jhu.edu</a></span></p>
<p class="footer_text_sm copy color_oxfordblue hide-for-small"><span>&copy;2020 Project MUSE. Produced by Johns Hopkins University Press in collaboration with The Sheridan Libraries.</span></p>
</div>
<div class="footer_item_logo cont_sub">
<p class="show-for-medium"><span class="semiboldit footer_text_sm">Now and always,<br/>The Trusted Content Your Research Requires.</span></p>
<p><span><a href="https://muse.jhu.edu">
<img class="show-for-medium" src="/images/muselogoblack.png" alt="Project MUSE logo" />
<img class="hide-for-medium" src="/images/muselogo.png" alt="Project MUSE logo" /></a></span></p>
<p class="hide-for-medium"><span class="semiboldit footer_text_sm">Now and always, The Trusted Content Your Research Requires.</span></p>
<p class="hide-for-small"><span class="footer_text_sm">Built on the Johns Hopkins University Campus</span></p>
</div>
<div class="clear"></div>
</div>
</div>
<div class="clear"></div>
</div>
</div>
<div class="footer_item_sub wrap hide-for-medium">
<p><span class="footer_text_sm">Built on the Johns Hopkins University Campus</span></p>
<p class="footer_text_sm copy color_oxfordblue"><span>&copy;2020 Project MUSE. Produced by Johns Hopkins University Press in collaboration with The Sheridan Libraries.</span></p>
</div>
</div>
<div id="btn_top">
<a href="#top"><span>Back To Top</span></a>
</div>
<input type="hidden" name="cookie_acknowledgement_type" id="cookie_acknowledgement_type" value="cookie_acknowledgement">
<div id="cookies_msg">
<p>This website uses cookies to ensure you get the best experience on our website. Without cookies your experience may not be seamless.</p>
<script>document.writeln('<a href="javascript://" class="btn_accept" id="accept_cookie_msg">Accept</a>');</script>
<noscript>
<form method="post" action="/account/set_attribute_no_ajax/cookie_acknowledgement/1">
<input type="submit" class="btn_accept" value="accept">
</form>
</noscript>
</div>
<script type="text/javascript" src="/js/lightbox.js"></script>
<script type="text/javascript" src="/js/jquery.qtip2.min.js"></script>
<script type="text/javascript" src="/js/post.js"></script>
<script type="text/javascript" src="/js/footnotes.js"></script>
<script type="text/javascript" src="/js/references.js"></script>
</body>
</html>

1603
spec/fixtures/issue-41793.html vendored Normal file

File diff suppressed because it is too large Load Diff

35
spec/issue_spec.cr Normal file
View File

@ -0,0 +1,35 @@
require "../src/issue"
require "./spec_helper"
require "webmock"
describe Muse::Dl::Issue do
WebMock.stub(:get, "https://muse.jhu.edu/issue/41793")
.to_return(body: File.new("spec/fixtures/issue-41793.html").gets_to_end)
issue = Muse::Dl::Issue.new "41793"
issue.parse
it "should initialize correctly" do
issue.id.should eq "41793"
issue.url.should eq "https://muse.jhu.edu/issue/41793"
end
# it "should parse info correctly" do
# issue.info["ISSN"].should eq "1530-7131"
# issue.info["Print ISSN"].should eq "1531-2542"
# issue.info["Launched on MUSE"].should eq "2020-02-05"
# issue.info["Open Access"].should eq "No"
# issue.title.should eq "Volume 20, Number 1, January 2020"
# end
# it "should parser summary" do
# issue.summary.should eq <<-EOT
# Focusing on important research about the role of academic libraries and librarianship, portal also features commentary on issues in technology and publishing. Written for all those interested in the role of libraries within the academy, portal includes peer-reviewed articles addressing subjects such as library administration, information technology, and information policy. In its inaugural year, portal earned recognition as the runner-up for best new journal, awarded by the Council of Editors of Learned Journals (CELJ). An article in portal, "Master's and Doctoral Thesis Citations: Analysis and Trends of a Longitudinal Study," won the Jesse H. Shera Award for Distinguished Published Research from the Library Research Round Table of the American Library Association.
# EOT
# end
# it "should parse publisher" do
# issue.publisher.should eq "Johns Hopkins University Press"
# end
end

7
src/article.cr Normal file
View File

@ -0,0 +1,7 @@
require "./infoparser.cr"
require "./issue.cr"
module Muse::Dl
class Article
end
end

View File

@ -1,13 +1,32 @@
require "./thing.cr"
require "./fetch.cr"
require "./article.cr"
module Muse::Dl
class Issue
@id : String
@title : String | Nil
@articles : Array(Muse::Dl::Article)
@url : String
@info : Hash(String, String) | Nil
@summary : String | Nil
@publisher : String | Nil
getter :id
getter :id, :title, :articles, :url, :summary, :publisher, :info
def initialize(id : String)
@id = id
@url = "https://muse.jhu.edu/issue/#{id}"
@title = "NA"
@articles = [] of Muse::Dl::Article
end
def parse
html = Crest.get(url).to_s
h = Myhtml::Parser.new html
@info = InfoParser.infobox(h)
@summary = InfoParser.summary(h)
@publisher = InfoParser.journal_publisher(h)
end
end
end