require "./spec_helper"
describe Myhtml do
it "direct use CssFilter" do
html = "<div><p id=p1><p id=p2><p id=p3><a>link</a><p id=p4><p id=p5><p id=p6></div>"
selector = "div > :nth-child(2n+1):not(:has(a))"
parser = Myhtml::Parser.new(html)
finder = Myhtml::CssFilter.new(selector)
nodes = finder.search_from(parser.html!).to_a
nodes.size.should eq 2
n1, n2 = nodes
n1.tag_name.should eq "p"
n1.attribute_by("id").should eq "p1"
n2.tag_name.should eq "p"
n2.attribute_by("id").should eq "p5"
end
it "css for root! node" do
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
parser = Myhtml::Parser.new(html)
nodes = parser.root!.css("div > :nth-child(2n+1):not(:has(a))").to_a
nodes.size.should eq 2
n1, n2 = nodes
n1.tag_name.should eq "p"
n1.attribute_by("id").should eq "p1"
n2.tag_name.should eq "p"
n2.attribute_by("id").should eq "p5"
end
it "another rule" do
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
parser = Myhtml::Parser.new(html)
parser.root!.css(".jo").to_a.map(&.attribute_by("id")).should eq %w(p2 p4 p6)
end
it "another rule for parser itself" do
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
parser = Myhtml::Parser.new(html)
parser.css(".jo").to_a.map(&.attribute_by("id")).should eq %w(p2 p4 p6)
end
it "work for another scope node" do
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><div id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></div></div>"
parser = Myhtml::Parser.new(html)
parser.nodes(:div).to_a.last.css(".jo").to_a.map(&.attribute_by("id")).should eq %w(p4 p6)
parser.nodes(:div).to_a.first.css(".jo").to_a.map(&.attribute_by("id")).should eq %w(p2 p4 p6)
end
context "build finder" do
it "for parser" do
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
parser = Myhtml::Parser.new(html)
finder = Myhtml::CssFilter.new(".jo")
10.times do
parser.root!.css(finder).to_a.map(&.attribute_by("id")).should eq %w(p2 p4 p6)
end
finder.inspect.should eq "Myhtml::CssFilter(rule: `.jo`)"
end
it "for parser" do
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
parser = Myhtml::Parser.new(html)
finder = Myhtml::CssFilter.new(".jo")
10.times do
parser.css(finder).to_a.map(&.attribute_by("id")).should eq %w(p2 p4 p6)
end
end
it "for root node" do
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
parser = Myhtml::Parser.new(html)
finder = Myhtml::CssFilter.new(".jo")
10.times do
parser.root!.css(finder).to_a.map(&.attribute_by("id")).should eq %w(p2 p4 p6)
end
end
end
it "should not raise on empty selector" do
html = "<div><p id=p1><p id=p2 class=jo><p id=p3><a>link</a><span id=bla><p id=p4 class=jo><p id=p5 class=bu><p id=p6 class=jo></span></div>"
parser = Myhtml::Parser.new(html)
finder = Myhtml::CssFilter.new("")
parser.css(finder).to_a.size.should eq 0
end
it "integration test" do
html = <<-PAGE
<div>
<p id=p1>
<p id=p2 class=jo>
<p id=p3>
<a href="some.html" id=a1>link1</a>
<a href="some.png" id=a2>link2</a>
<div id=bla>
<p id=p4 class=jo>
<p id=p5 class=bu>
<p id=p6 class=jo>
</div>
</div>
PAGE
parser = Myhtml::Parser.new(html)
parser.css("p[id*=p]").map(&.attribute_by("id")).to_a.should eq ["p1", "p2", "p3", "p4", "p5", "p6"]
parser.css("p.jo").map(&.attribute_by("id")).to_a.should eq ["p2", "p4", "p6"]
parser.css(".jo").map(&.attribute_by("id")).to_a.should eq ["p2", "p4", "p6"]
parser.css("div > :nth-child(2n+1):not(:has(a))").map(&.attribute_by("id")).to_a.should eq ["p1", "p4", "p6"]
parser.css("div").to_a.last.css(".jo").map(&.attribute_by("id")).to_a.should eq ["p4", "p6"]
parser.css(%q{a[href$=".png"]}).map(&.attribute_by("id")).to_a.should eq ["a2"]
parser.css(%q{p[id=p3] > a[href*="html"]}).map(&.attribute_by("id")).to_a.should eq ["a1"]
parser.css(%q{p[id=p3] > a:matches([href *= "html"], [href $= ".png"])}).map(&.attribute_by("id")).to_a.should eq ["a1", "a2"]
finder = Myhtml::CssFilter.new(".jo")
parser.css(finder).map(&.attribute_by("id")).to_a.should eq ["p2", "p4", "p6"]
end
it "integration test2" do
html = <<-PAGE
<html><body>
<table id="t1"><tbody>
<tr><td>Hello</td></tr>
</tbody></table>
<table id="t2"><tbody>
<tr><td>123</td><td>other</td></tr>
<tr><td>foo</td><td>columns</td></tr>
<tr><td>bar</td><td>are</td></tr>
<tr><td>xyz</td><td>ignored</td></tr>
</tbody></table>
</body></html>
PAGE
parser = Myhtml::Parser.new(html)
parser.css("#t2 tr td:first-child").map(&.inner_text).to_a.should eq ["123", "foo", "bar", "xyz"]
parser.css("#t2 tr td:first-child").map(&.to_html).to_a.should eq ["<td>123</td>", "<td>foo</td>", "<td>bar</td>", "<td>xyz</td>"]
res = [] of String
parser.css("#t2 tr").each do |node|
res << node.css("td:first-child").first.inner_text
end
res.join('|').should eq "123|foo|bar|xyz"
end
it "not sigfaulting on more than 1024 elements" do
str = "<html>" + "<div class=A>ooo</div>" * 20000 + "</html>"
parser = Myhtml::Parser.new(str)
c = 0
x = 0
parser.css("div").each do |node|
x += 1
c += 1 if node.attribute_by("class") == "A"
end
x.should eq 20000
c.should eq 20000
end
it "bug in css" do
parser = Myhtml::Parser.new(%q{<div class="asfjjjj">bla</div>})
parser.css("div.jjjj").to_a.size.should eq 0
end
it "css with yield" do
parser = Myhtml::Parser.new(%q{<div class="jjjj">bla</div>})
parser.css("div.jjjj") { |col| col.to_a.size }.should eq 1
end
end