author | Pawel Solyga <Pawel.Solyga@gmail.com> |
Thu, 04 Jun 2009 20:55:30 +0200 | |
changeset 2390 | 723dfa4811e8 |
parent 2323 | b3daada52dd3 |
permissions | -rw-r--r-- |
2323
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
1 |
# -*- coding: utf-8 -*- |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
2 |
"""Unit tests for Beautiful Soup. |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
3 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
4 |
These tests make sure the Beautiful Soup works as it should. If you |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
5 |
find a bug in Beautiful Soup, the best way to express it is as a test |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
6 |
case like this that fails.""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
7 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
8 |
import unittest |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
9 |
from BeautifulSoup import * |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
10 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
11 |
class SoupTest(unittest.TestCase): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
12 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
13 |
def assertSoupEquals(self, toParse, rep=None, c=BeautifulSoup, |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
14 |
encoding=None): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
15 |
"""Parse the given text and make sure its string rep is the other |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
16 |
given text.""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
17 |
if rep == None: |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
18 |
rep = toParse |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
19 |
obj = c(toParse) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
20 |
if encoding is None: |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
21 |
rep2 = obj.decode() |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
22 |
else: |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
23 |
rep2 = obj.encode(encoding) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
24 |
self.assertEqual(rep2, rep) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
25 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
26 |
class FollowThatTag(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
27 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
28 |
"Tests the various ways of fetching tags from a soup." |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
29 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
30 |
def setUp(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
31 |
ml = """ |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
32 |
<a id="x">1</a> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
33 |
<A id="a">2</a> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
34 |
<b id="b">3</a> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
35 |
<b href="foo" id="x">4</a> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
36 |
<ac width=100>4</ac>""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
37 |
self.soup = BeautifulStoneSoup(ml) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
38 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
39 |
def testFindAllByName(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
40 |
matching = self.soup('a') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
41 |
self.assertEqual(len(matching), 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
42 |
self.assertEqual(matching[0].name, 'a') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
43 |
self.assertEqual(matching, self.soup.findAll('a')) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
44 |
self.assertEqual(matching, self.soup.findAll(SoupStrainer('a'))) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
45 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
46 |
def testFindAllByAttribute(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
47 |
matching = self.soup.findAll(id='x') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
48 |
self.assertEqual(len(matching), 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
49 |
self.assertEqual(matching[0].name, 'a') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
50 |
self.assertEqual(matching[1].name, 'b') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
51 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
52 |
matching2 = self.soup.findAll(attrs={'id' : 'x'}) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
53 |
self.assertEqual(matching, matching2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
54 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
55 |
strainer = SoupStrainer(attrs={'id' : 'x'}) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
56 |
self.assertEqual(matching, self.soup.findAll(strainer)) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
57 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
58 |
self.assertEqual(len(self.soup.findAll(id=None)), 1) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
59 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
60 |
self.assertEqual(len(self.soup.findAll(width=100)), 1) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
61 |
self.assertEqual(len(self.soup.findAll(junk=None)), 5) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
62 |
self.assertEqual(len(self.soup.findAll(junk=[1, None])), 5) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
63 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
64 |
self.assertEqual(len(self.soup.findAll(junk=re.compile('.*'))), 0) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
65 |
self.assertEqual(len(self.soup.findAll(junk=True)), 0) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
66 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
67 |
self.assertEqual(len(self.soup.findAll(junk=True)), 0) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
68 |
self.assertEqual(len(self.soup.findAll(href=True)), 1) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
69 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
70 |
def testFindallByClass(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
71 |
soup = BeautifulSoup('<a>Foo</a><a class="1">Bar</a>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
72 |
self.assertEqual(soup.find('a', '1').string, "Bar") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
73 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
74 |
def testFindAllByList(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
75 |
matching = self.soup(['a', 'ac']) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
76 |
self.assertEqual(len(matching), 3) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
77 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
78 |
def testFindAllByHash(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
79 |
matching = self.soup({'a' : True, 'b' : True}) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
80 |
self.assertEqual(len(matching), 4) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
81 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
82 |
def testFindAllText(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
83 |
soup = BeautifulSoup("<html>\xbb</html>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
84 |
self.assertEqual(soup.findAll(text=re.compile('.*')), |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
85 |
[u'\xbb']) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
86 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
87 |
def testFindAllByRE(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
88 |
import re |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
89 |
r = re.compile('a.*') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
90 |
self.assertEqual(len(self.soup(r)), 3) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
91 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
92 |
def testFindAllByMethod(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
93 |
def matchTagWhereIDMatchesName(tag): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
94 |
return tag.name == tag.get('id') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
95 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
96 |
matching = self.soup.findAll(matchTagWhereIDMatchesName) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
97 |
self.assertEqual(len(matching), 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
98 |
self.assertEqual(matching[0].name, 'a') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
99 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
100 |
def testParents(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
101 |
soup = BeautifulSoup('<ul id="foo"></ul><ul id="foo"><ul><ul id="foo" a="b"><b>Blah') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
102 |
b = soup.b |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
103 |
self.assertEquals(len(b.findParents('ul', {'id' : 'foo'})), 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
104 |
self.assertEquals(b.findParent('ul')['a'], 'b') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
105 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
106 |
PROXIMITY_TEST = BeautifulSoup('<b id="1"><b id="2"><b id="3"><b id="4">') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
107 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
108 |
def testNext(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
109 |
soup = self.PROXIMITY_TEST |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
110 |
b = soup.find('b', {'id' : 2}) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
111 |
self.assertEquals(b.findNext('b')['id'], '3') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
112 |
self.assertEquals(b.findNext('b')['id'], '3') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
113 |
self.assertEquals(len(b.findAllNext('b')), 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
114 |
self.assertEquals(len(b.findAllNext('b', {'id' : 4})), 1) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
115 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
116 |
def testPrevious(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
117 |
soup = self.PROXIMITY_TEST |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
118 |
b = soup.find('b', {'id' : 3}) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
119 |
self.assertEquals(b.findPrevious('b')['id'], '2') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
120 |
self.assertEquals(b.findPrevious('b')['id'], '2') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
121 |
self.assertEquals(len(b.findAllPrevious('b')), 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
122 |
self.assertEquals(len(b.findAllPrevious('b', {'id' : 2})), 1) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
123 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
124 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
125 |
SIBLING_TEST = BeautifulSoup('<blockquote id="1"><blockquote id="1.1"></blockquote></blockquote><blockquote id="2"><blockquote id="2.1"></blockquote></blockquote><blockquote id="3"><blockquote id="3.1"></blockquote></blockquote><blockquote id="4">') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
126 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
127 |
def testNextSibling(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
128 |
soup = self.SIBLING_TEST |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
129 |
tag = 'blockquote' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
130 |
b = soup.find(tag, {'id' : 2}) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
131 |
self.assertEquals(b.findNext(tag)['id'], '2.1') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
132 |
self.assertEquals(b.findNextSibling(tag)['id'], '3') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
133 |
self.assertEquals(b.findNextSibling(tag)['id'], '3') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
134 |
self.assertEquals(len(b.findNextSiblings(tag)), 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
135 |
self.assertEquals(len(b.findNextSiblings(tag, {'id' : 4})), 1) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
136 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
137 |
def testPreviousSibling(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
138 |
soup = self.SIBLING_TEST |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
139 |
tag = 'blockquote' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
140 |
b = soup.find(tag, {'id' : 3}) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
141 |
self.assertEquals(b.findPrevious(tag)['id'], '2.1') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
142 |
self.assertEquals(b.findPreviousSibling(tag)['id'], '2') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
143 |
self.assertEquals(b.findPreviousSibling(tag)['id'], '2') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
144 |
self.assertEquals(len(b.findPreviousSiblings(tag)), 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
145 |
self.assertEquals(len(b.findPreviousSiblings(tag, id=1)), 1) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
146 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
147 |
def testTextNavigation(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
148 |
soup = BeautifulSoup('Foo<b>Bar</b><i id="1"><b>Baz<br />Blee<hr id="1"/></b></i>Blargh') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
149 |
baz = soup.find(text='Baz') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
150 |
self.assertEquals(baz.findParent("i")['id'], '1') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
151 |
self.assertEquals(baz.findNext(text='Blee'), 'Blee') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
152 |
self.assertEquals(baz.findNextSibling(text='Blee'), 'Blee') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
153 |
self.assertEquals(baz.findNextSibling(text='Blargh'), None) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
154 |
self.assertEquals(baz.findNextSibling('hr')['id'], '1') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
155 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
156 |
class SiblingRivalry(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
157 |
"Tests the nextSibling and previousSibling navigation." |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
158 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
159 |
def testSiblings(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
160 |
soup = BeautifulSoup("<ul><li>1<p>A</p>B<li>2<li>3</ul>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
161 |
secondLI = soup.find('li').nextSibling |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
162 |
self.assert_(secondLI.name == 'li' and secondLI.string == '2') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
163 |
self.assertEquals(soup.find(text='1').nextSibling.name, 'p') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
164 |
self.assertEquals(soup.find('p').nextSibling, 'B') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
165 |
self.assertEquals(soup.find('p').nextSibling.previousSibling.nextSibling, 'B') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
166 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
167 |
class TagsAreObjectsToo(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
168 |
"Tests the various built-in functions of Tag objects." |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
169 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
170 |
def testLen(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
171 |
soup = BeautifulSoup("<top>1<b>2</b>3</top>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
172 |
self.assertEquals(len(soup.top), 3) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
173 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
174 |
class StringEmUp(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
175 |
"Tests the use of 'string' as an alias for a tag's only content." |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
176 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
177 |
def testString(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
178 |
s = BeautifulSoup("<b>foo</b>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
179 |
self.assertEquals(s.b.string, 'foo') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
180 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
181 |
def testLackOfString(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
182 |
s = BeautifulSoup("<b>f<i>e</i>o</b>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
183 |
self.assert_(not s.b.string) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
184 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
185 |
class ThatsMyLimit(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
186 |
"Tests the limit argument." |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
187 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
188 |
def testBasicLimits(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
189 |
s = BeautifulSoup('<br id="1" /><br id="1" /><br id="1" /><br id="1" />') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
190 |
self.assertEquals(len(s.findAll('br')), 4) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
191 |
self.assertEquals(len(s.findAll('br', limit=2)), 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
192 |
self.assertEquals(len(s('br', limit=2)), 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
193 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
194 |
class OnlyTheLonely(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
195 |
"Tests the parseOnly argument to the constructor." |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
196 |
def setUp(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
197 |
x = [] |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
198 |
for i in range(1,6): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
199 |
x.append('<a id="%s">' % i) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
200 |
for j in range(100,103): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
201 |
x.append('<b id="%s.%s">Content %s.%s</b>' % (i,j, i,j)) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
202 |
x.append('</a>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
203 |
self.x = ''.join(x) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
204 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
205 |
def testOnly(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
206 |
strainer = SoupStrainer("b") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
207 |
soup = BeautifulSoup(self.x, parseOnlyThese=strainer) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
208 |
self.assertEquals(len(soup), 15) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
209 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
210 |
strainer = SoupStrainer(id=re.compile("100.*")) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
211 |
soup = BeautifulSoup(self.x, parseOnlyThese=strainer) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
212 |
self.assertEquals(len(soup), 5) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
213 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
214 |
strainer = SoupStrainer(text=re.compile("10[01].*")) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
215 |
soup = BeautifulSoup(self.x, parseOnlyThese=strainer) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
216 |
self.assertEquals(len(soup), 10) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
217 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
218 |
strainer = SoupStrainer(text=lambda(x):x[8]=='3') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
219 |
soup = BeautifulSoup(self.x, parseOnlyThese=strainer) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
220 |
self.assertEquals(len(soup), 3) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
221 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
222 |
class PickleMeThis(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
223 |
"Testing features like pickle and deepcopy." |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
224 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
225 |
def setUp(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
226 |
self.page = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
227 |
"http://www.w3.org/TR/REC-html40/transitional.dtd"> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
228 |
<html> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
229 |
<head> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
230 |
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
231 |
<title>Beautiful Soup: We called him Tortoise because he taught us.</title> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
232 |
<link rev="made" href="mailto:leonardr@segfault.org"> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
233 |
<meta name="Description" content="Beautiful Soup: an HTML parser optimized for screen-scraping."> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
234 |
<meta name="generator" content="Markov Approximation 1.4 (module: leonardr)"> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
235 |
<meta name="author" content="Leonard Richardson"> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
236 |
</head> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
237 |
<body> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
238 |
<a href="foo">foo</a> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
239 |
<a href="foo"><b>bar</b></a> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
240 |
</body> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
241 |
</html>""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
242 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
243 |
self.soup = BeautifulSoup(self.page) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
244 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
245 |
def testPickle(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
246 |
import pickle |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
247 |
dumped = pickle.dumps(self.soup, 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
248 |
loaded = pickle.loads(dumped) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
249 |
self.assertEqual(loaded.__class__, BeautifulSoup) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
250 |
self.assertEqual(loaded.decode(), self.soup.decode()) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
251 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
252 |
def testDeepcopy(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
253 |
from copy import deepcopy |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
254 |
deepcopy(BeautifulSoup("<a></a>")) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
255 |
copied = deepcopy(self.soup) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
256 |
self.assertEqual(copied.decode(), self.soup.decode()) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
257 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
258 |
def testUnicodePickle(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
259 |
import cPickle as pickle |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
260 |
html = "<b>" + chr(0xc3) + "</b>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
261 |
soup = BeautifulSoup(html) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
262 |
dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
263 |
loaded = pickle.loads(dumped) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
264 |
self.assertEqual(loaded.decode(), soup.decode()) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
265 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
266 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
267 |
class WriteOnlyCode(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
268 |
"Testing the modification of the tree." |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
269 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
270 |
def testModifyAttributes(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
271 |
soup = BeautifulSoup('<a id="1"></a>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
272 |
soup.a['id'] = 2 |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
273 |
self.assertEqual(soup.decode(), '<a id="2"></a>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
274 |
del(soup.a['id']) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
275 |
self.assertEqual(soup.decode(), '<a></a>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
276 |
soup.a['id2'] = 'foo' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
277 |
self.assertEqual(soup.decode(), '<a id2="foo"></a>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
278 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
279 |
def testNewTagCreation(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
280 |
"Makes sure tags don't step on each others' toes." |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
281 |
soup = BeautifulSoup() |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
282 |
a = Tag(soup, 'a') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
283 |
ol = Tag(soup, 'ol') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
284 |
a['href'] = 'http://foo.com/' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
285 |
self.assertRaises(KeyError, lambda : ol['href']) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
286 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
287 |
def testTagReplacement(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
288 |
# Make sure you can replace an element with itself. |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
289 |
text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
290 |
soup = BeautifulSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
291 |
c = soup.c |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
292 |
soup.c.replaceWith(c) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
293 |
self.assertEquals(soup.decode(), text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
294 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
295 |
# A very simple case |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
296 |
soup = BeautifulSoup("<b>Argh!</b>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
297 |
soup.find(text="Argh!").replaceWith("Hooray!") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
298 |
newText = soup.find(text="Hooray!") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
299 |
b = soup.b |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
300 |
self.assertEqual(newText.previous, b) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
301 |
self.assertEqual(newText.parent, b) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
302 |
self.assertEqual(newText.previous.next, newText) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
303 |
self.assertEqual(newText.next, None) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
304 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
305 |
# A more complex case |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
306 |
soup = BeautifulSoup("<a><b>Argh!</b><c></c><d></d></a>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
307 |
soup.b.insert(1, "Hooray!") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
308 |
newText = soup.find(text="Hooray!") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
309 |
self.assertEqual(newText.previous, "Argh!") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
310 |
self.assertEqual(newText.previous.next, newText) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
311 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
312 |
self.assertEqual(newText.previousSibling, "Argh!") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
313 |
self.assertEqual(newText.previousSibling.nextSibling, newText) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
314 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
315 |
self.assertEqual(newText.nextSibling, None) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
316 |
self.assertEqual(newText.next, soup.c) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
317 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
318 |
text = "<html>There's <b>no</b> business like <b>show</b> business</html>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
319 |
soup = BeautifulSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
320 |
no, show = soup.findAll('b') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
321 |
show.replaceWith(no) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
322 |
self.assertEquals(soup.decode(), "<html>There's business like <b>no</b> business</html>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
323 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
324 |
# Even more complex |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
325 |
soup = BeautifulSoup("<a><b>Find</b><c>lady!</c><d></d></a>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
326 |
tag = Tag(soup, 'magictag') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
327 |
tag.insert(0, "the") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
328 |
soup.a.insert(1, tag) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
329 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
330 |
b = soup.b |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
331 |
c = soup.c |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
332 |
theText = tag.find(text=True) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
333 |
findText = b.find(text="Find") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
334 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
335 |
self.assertEqual(findText.next, tag) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
336 |
self.assertEqual(tag.previous, findText) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
337 |
self.assertEqual(b.nextSibling, tag) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
338 |
self.assertEqual(tag.previousSibling, b) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
339 |
self.assertEqual(tag.nextSibling, c) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
340 |
self.assertEqual(c.previousSibling, tag) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
341 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
342 |
self.assertEqual(theText.next, c) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
343 |
self.assertEqual(c.previous, theText) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
344 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
345 |
# Aand... incredibly complex. |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
346 |
soup = BeautifulSoup("""<a>We<b>reserve<c>the</c><d>right</d></b></a><e>to<f>refuse</f><g>service</g></e>""") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
347 |
f = soup.f |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
348 |
a = soup.a |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
349 |
c = soup.c |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
350 |
e = soup.e |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
351 |
weText = a.find(text="We") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
352 |
soup.b.replaceWith(soup.f) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
353 |
self.assertEqual(soup.decode(), "<a>We<f>refuse</f></a><e>to<g>service</g></e>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
354 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
355 |
self.assertEqual(f.previous, weText) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
356 |
self.assertEqual(weText.next, f) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
357 |
self.assertEqual(f.previousSibling, weText) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
358 |
self.assertEqual(f.nextSibling, None) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
359 |
self.assertEqual(weText.nextSibling, f) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
360 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
361 |
def testAppend(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
362 |
doc = "<p>Don't leave me <b>here</b>.</p> <p>Don't leave me.</p>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
363 |
soup = BeautifulSoup(doc) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
364 |
second_para = soup('p')[1] |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
365 |
bold = soup.find('b') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
366 |
soup('p')[1].append(soup.find('b')) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
367 |
self.assertEqual(bold.parent, second_para) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
368 |
self.assertEqual(soup.decode(), |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
369 |
"<p>Don't leave me .</p> " |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
370 |
"<p>Don't leave me.<b>here</b></p>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
371 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
372 |
def testTagExtraction(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
373 |
# A very simple case |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
374 |
text = '<html><div id="nav">Nav crap</div>Real content here.</html>' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
375 |
soup = BeautifulSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
376 |
extracted = soup.find("div", id="nav").extract() |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
377 |
self.assertEqual(soup.decode(), "<html>Real content here.</html>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
378 |
self.assertEqual(extracted.decode(), '<div id="nav">Nav crap</div>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
379 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
380 |
# A simple case, a more complex test. |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
381 |
text = "<doc><a>1<b>2</b></a><a>i<b>ii</b></a><a>A<b>B</b></a></doc>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
382 |
soup = BeautifulStoneSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
383 |
doc = soup.doc |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
384 |
numbers, roman, letters = soup("a") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
385 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
386 |
self.assertEqual(roman.parent, doc) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
387 |
oldPrevious = roman.previous |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
388 |
endOfThisTag = roman.nextSibling.previous |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
389 |
self.assertEqual(oldPrevious, "2") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
390 |
self.assertEqual(roman.next, "i") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
391 |
self.assertEqual(endOfThisTag, "ii") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
392 |
self.assertEqual(roman.previousSibling, numbers) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
393 |
self.assertEqual(roman.nextSibling, letters) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
394 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
395 |
roman.extract() |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
396 |
self.assertEqual(roman.parent, None) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
397 |
self.assertEqual(roman.previous, None) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
398 |
self.assertEqual(roman.next, "i") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
399 |
self.assertEqual(letters.previous, '2') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
400 |
self.assertEqual(roman.previousSibling, None) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
401 |
self.assertEqual(roman.nextSibling, None) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
402 |
self.assertEqual(endOfThisTag.next, None) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
403 |
self.assertEqual(roman.b.contents[0].next, None) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
404 |
self.assertEqual(numbers.nextSibling, letters) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
405 |
self.assertEqual(letters.previousSibling, numbers) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
406 |
self.assertEqual(len(doc.contents), 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
407 |
self.assertEqual(doc.contents[0], numbers) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
408 |
self.assertEqual(doc.contents[1], letters) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
409 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
410 |
# A more complex case. |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
411 |
text = "<a>1<b>2<c>Hollywood, baby!</c></b></a>3" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
412 |
soup = BeautifulStoneSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
413 |
one = soup.find(text="1") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
414 |
three = soup.find(text="3") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
415 |
toExtract = soup.b |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
416 |
soup.b.extract() |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
417 |
self.assertEqual(one.next, three) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
418 |
self.assertEqual(three.previous, one) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
419 |
self.assertEqual(one.parent.nextSibling, three) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
420 |
self.assertEqual(three.previousSibling, soup.a) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
421 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
422 |
class TheManWithoutAttributes(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
423 |
"Test attribute access" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
424 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
425 |
def testHasKey(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
426 |
text = "<foo attr='bar'>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
427 |
self.assertTrue(BeautifulSoup(text).foo.has_key('attr')) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
428 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
429 |
class QuoteMeOnThat(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
430 |
"Test quoting" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
431 |
def testQuotedAttributeValues(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
432 |
self.assertSoupEquals("<foo attr='bar'></foo>", |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
433 |
'<foo attr="bar"></foo>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
434 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
435 |
text = """<foo attr='bar "brawls" happen'>a</foo>""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
436 |
soup = BeautifulSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
437 |
self.assertEquals(soup.decode(), text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
438 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
439 |
soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
440 |
newText = """<foo attr='Brawls happen at "Bob&squot;s Bar"'>a</foo>""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
441 |
self.assertSoupEquals(soup.decode(), newText) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
442 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
443 |
self.assertSoupEquals('<this is="really messed up & stuff">', |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
444 |
'<this is="really messed up & stuff"></this>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
445 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
446 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
447 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
448 |
class YoureSoLiteral(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
449 |
"Test literal mode." |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
450 |
def testLiteralMode(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
451 |
text = "<script>if (i<imgs.length)</script><b>Foo</b>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
452 |
soup = BeautifulSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
453 |
self.assertEqual(soup.script.contents[0], "if (i<imgs.length)") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
454 |
self.assertEqual(soup.b.contents[0], "Foo") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
455 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
456 |
def testTextArea(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
457 |
text = "<textarea><b>This is an example of an HTML tag</b><&<&</textarea>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
458 |
soup = BeautifulSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
459 |
self.assertEqual(soup.textarea.contents[0], |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
460 |
"<b>This is an example of an HTML tag</b><&<&") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
461 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
462 |
class OperatorOverload(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
463 |
"Our operators do it all! Call now!" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
464 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
465 |
def testTagNameAsFind(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
466 |
"Tests that referencing a tag name as a member delegates to find()." |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
467 |
soup = BeautifulSoup('<b id="1">foo<i>bar</i></b><b>Red herring</b>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
468 |
self.assertEqual(soup.b.i, soup.find('b').find('i')) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
469 |
self.assertEqual(soup.b.i.string, 'bar') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
470 |
self.assertEqual(soup.b['id'], '1') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
471 |
self.assertEqual(soup.b.contents[0], 'foo') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
472 |
self.assert_(not soup.a) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
473 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
474 |
#Test the .fooTag variant of .foo. |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
475 |
self.assertEqual(soup.bTag.iTag.string, 'bar') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
476 |
self.assertEqual(soup.b.iTag.string, 'bar') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
477 |
self.assertEqual(soup.find('b').find('i'), soup.bTag.iTag) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
478 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
479 |
class NestableEgg(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
480 |
"""Here we test tag nesting. TEST THE NEST, DUDE! X-TREME!""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
481 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
482 |
def testParaInsideBlockquote(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
483 |
soup = BeautifulSoup('<blockquote><p><b>Foo</blockquote><p>Bar') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
484 |
self.assertEqual(soup.blockquote.p.b.string, 'Foo') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
485 |
self.assertEqual(soup.blockquote.b.string, 'Foo') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
486 |
self.assertEqual(soup.find('p', recursive=False).string, 'Bar') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
487 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
488 |
def testNestedTables(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
489 |
text = """<table id="1"><tr><td>Here's another table: |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
490 |
<table id="2"><tr><td>Juicy text</td></tr></table></td></tr></table>""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
491 |
soup = BeautifulSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
492 |
self.assertEquals(soup.table.table.td.string, 'Juicy text') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
493 |
self.assertEquals(len(soup.findAll('table')), 2) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
494 |
self.assertEquals(len(soup.table.findAll('table')), 1) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
495 |
self.assertEquals(soup.find('table', {'id' : 2}).parent.parent.parent.name, |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
496 |
'table') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
497 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
498 |
text = "<table><tr><td><div><table>Foo</table></div></td></tr></table>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
499 |
soup = BeautifulSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
500 |
self.assertEquals(soup.table.tr.td.div.table.contents[0], "Foo") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
501 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
502 |
text = """<table><thead><tr>Foo</tr></thead><tbody><tr>Bar</tr></tbody> |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
503 |
<tfoot><tr>Baz</tr></tfoot></table>""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
504 |
soup = BeautifulSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
505 |
self.assertEquals(soup.table.thead.tr.contents[0], "Foo") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
506 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
507 |
def testBadNestedTables(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
508 |
soup = BeautifulSoup("<table><tr><table><tr id='nested'>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
509 |
self.assertEquals(soup.table.tr.table.tr['id'], 'nested') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
510 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
511 |
class CleanupOnAisleFour(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
512 |
"""Here we test cleanup of text that breaks HTMLParser or is just |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
513 |
obnoxious.""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
514 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
515 |
def testSelfClosingtag(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
516 |
self.assertEqual(BeautifulSoup("Foo<br/>Bar").find('br').decode(), |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
517 |
'<br />') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
518 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
519 |
self.assertSoupEquals('<p>test1<br/>test2</p>', |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
520 |
'<p>test1<br />test2</p>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
521 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
522 |
text = '<p>test1<selfclosing>test2' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
523 |
soup = BeautifulStoneSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
524 |
self.assertEqual(soup.decode(), |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
525 |
'<p>test1<selfclosing>test2</selfclosing></p>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
526 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
527 |
soup = BeautifulStoneSoup(text, selfClosingTags='selfclosing') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
528 |
self.assertEqual(soup.decode(), |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
529 |
'<p>test1<selfclosing />test2</p>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
530 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
531 |
def testSelfClosingTagOrNot(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
532 |
text = "<item><link>http://foo.com/</link></item>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
533 |
self.assertEqual(BeautifulStoneSoup(text).decode(), text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
534 |
self.assertEqual(BeautifulSoup(text).decode(), |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
535 |
'<item><link />http://foo.com/</item>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
536 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
537 |
def testBooleanAttributes(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
538 |
text = "<td nowrap>foo</td>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
539 |
self.assertSoupEquals(text, text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
540 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
541 |
def testCData(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
542 |
xml = "<root>foo<![CDATA[foobar]]>bar</root>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
543 |
self.assertSoupEquals(xml, xml) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
544 |
r = re.compile("foo.*bar") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
545 |
soup = BeautifulSoup(xml) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
546 |
self.assertEquals(soup.find(text=r).string, "foobar") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
547 |
self.assertEquals(soup.find(text=r).__class__, CData) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
548 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
549 |
def testComments(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
550 |
xml = "foo<!--foobar-->baz" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
551 |
self.assertSoupEquals(xml) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
552 |
r = re.compile("foo.*bar") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
553 |
soup = BeautifulSoup(xml) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
554 |
self.assertEquals(soup.find(text=r).string, "foobar") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
555 |
self.assertEquals(soup.find(text="foobar").__class__, Comment) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
556 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
557 |
def testDeclaration(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
558 |
xml = "foo<!DOCTYPE foobar>baz" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
559 |
self.assertSoupEquals(xml) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
560 |
r = re.compile(".*foo.*bar") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
561 |
soup = BeautifulSoup(xml) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
562 |
text = "DOCTYPE foobar" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
563 |
self.assertEquals(soup.find(text=r).string, text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
564 |
self.assertEquals(soup.find(text=text).__class__, Declaration) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
565 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
566 |
namespaced_doctype = ('<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
567 |
'<html>foo</html>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
568 |
soup = BeautifulSoup(namespaced_doctype) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
569 |
self.assertEquals(soup.contents[0], |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
570 |
'DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd"') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
571 |
self.assertEquals(soup.html.contents[0], 'foo') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
572 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
573 |
def testEntityConversions(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
574 |
text = "<<sacré bleu!>>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
575 |
soup = BeautifulStoneSoup(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
576 |
self.assertSoupEquals(text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
577 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
578 |
xmlEnt = BeautifulStoneSoup.XML_ENTITIES |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
579 |
htmlEnt = BeautifulStoneSoup.HTML_ENTITIES |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
580 |
xhtmlEnt = BeautifulStoneSoup.XHTML_ENTITIES |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
581 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
582 |
soup = BeautifulStoneSoup(text, convertEntities=xmlEnt) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
583 |
self.assertEquals(soup.decode(), "<<sacré bleu!>>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
584 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
585 |
soup = BeautifulStoneSoup(text, convertEntities=xmlEnt) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
586 |
self.assertEquals(soup.decode(), "<<sacré bleu!>>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
587 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
588 |
soup = BeautifulStoneSoup(text, convertEntities=htmlEnt) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
589 |
self.assertEquals(soup.decode(), u"<<sacr\xe9 bleu!>>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
590 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
591 |
# Make sure the "XML", "HTML", and "XHTML" settings work. |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
592 |
text = "<™'" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
593 |
soup = BeautifulStoneSoup(text, convertEntities=xmlEnt) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
594 |
self.assertEquals(soup.decode(), u"<™'") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
595 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
596 |
soup = BeautifulStoneSoup(text, convertEntities=htmlEnt) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
597 |
self.assertEquals(soup.decode(), u"<\u2122'") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
598 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
599 |
soup = BeautifulStoneSoup(text, convertEntities=xhtmlEnt) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
600 |
self.assertEquals(soup.decode(), u"<\u2122'") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
601 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
602 |
def testNonBreakingSpaces(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
603 |
soup = BeautifulSoup("<a> </a>", |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
604 |
convertEntities=BeautifulStoneSoup.HTML_ENTITIES) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
605 |
self.assertEquals(soup.decode(), u"<a>\xa0\xa0</a>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
606 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
607 |
def testWhitespaceInDeclaration(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
608 |
self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
609 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
610 |
def testJunkInDeclaration(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
611 |
self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
612 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
613 |
def testIncompleteDeclaration(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
614 |
self.assertSoupEquals('a<!b <p>c') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
615 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
616 |
def testEntityReplacement(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
617 |
self.assertSoupEquals('<b>hello there</b>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
618 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
619 |
def testEntitiesInAttributeValues(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
620 |
self.assertSoupEquals('<x t="xñ">', '<x t="x\xc3\xb1"></x>', |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
621 |
encoding='utf-8') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
622 |
self.assertSoupEquals('<x t="xñ">', '<x t="x\xc3\xb1"></x>', |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
623 |
encoding='utf-8') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
624 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
625 |
soup = BeautifulSoup('<x t=">™">', |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
626 |
convertEntities=BeautifulStoneSoup.HTML_ENTITIES) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
627 |
self.assertEquals(soup.decode(), u'<x t=">\u2122"></x>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
628 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
629 |
uri = "http://crummy.com?sacré&bleu" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
630 |
link = '<a href="%s"></a>' % uri |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
631 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
632 |
soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
633 |
self.assertEquals(soup.decode(), |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
634 |
link.replace("é", u"\xe9")) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
635 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
636 |
uri = "http://crummy.com?sacré&bleu" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
637 |
link = '<a href="%s"></a>' % uri |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
638 |
soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
639 |
self.assertEquals(soup.a['href'], |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
640 |
uri.replace("é", u"\xe9")) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
641 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
642 |
def testNakedAmpersands(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
643 |
html = {'convertEntities':BeautifulStoneSoup.HTML_ENTITIES} |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
644 |
soup = BeautifulStoneSoup("AT&T ", **html) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
645 |
self.assertEquals(soup.decode(), 'AT&T ') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
646 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
647 |
nakedAmpersandInASentence = "AT&T was Ma Bell" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
648 |
soup = BeautifulStoneSoup(nakedAmpersandInASentence,**html) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
649 |
self.assertEquals(soup.decode(), \ |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
650 |
nakedAmpersandInASentence.replace('&','&')) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
651 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
652 |
invalidURL = '<a href="http://example.org?a=1&b=2;3">foo</a>' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
653 |
validURL = invalidURL.replace('&','&') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
654 |
soup = BeautifulStoneSoup(invalidURL) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
655 |
self.assertEquals(soup.decode(), validURL) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
656 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
657 |
soup = BeautifulStoneSoup(validURL) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
658 |
self.assertEquals(soup.decode(), validURL) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
659 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
660 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
661 |
class EncodeRed(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
662 |
"""Tests encoding conversion, Unicode conversion, and Microsoft |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
663 |
smart quote fixes.""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
664 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
665 |
def testUnicodeDammitStandalone(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
666 |
markup = "<foo>\x92</foo>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
667 |
dammit = UnicodeDammit(markup) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
668 |
self.assertEquals(dammit.unicode, "<foo>’</foo>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
669 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
670 |
hebrew = "\xed\xe5\xec\xf9" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
671 |
dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
672 |
self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
673 |
self.assertEquals(dammit.originalEncoding, 'iso-8859-8') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
674 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
675 |
def testGarbageInGarbageOut(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
676 |
ascii = "<foo>a</foo>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
677 |
asciiSoup = BeautifulStoneSoup(ascii) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
678 |
self.assertEquals(ascii, asciiSoup.decode()) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
679 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
680 |
unicodeData = u"<foo>\u00FC</foo>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
681 |
utf8 = unicodeData.encode("utf-8") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
682 |
self.assertEquals(utf8, '<foo>\xc3\xbc</foo>') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
683 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
684 |
unicodeSoup = BeautifulStoneSoup(unicodeData) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
685 |
self.assertEquals(unicodeData, unicodeSoup.decode()) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
686 |
self.assertEquals(unicodeSoup.foo.string, u'\u00FC') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
687 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
688 |
utf8Soup = BeautifulStoneSoup(utf8, fromEncoding='utf-8') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
689 |
self.assertEquals(utf8, utf8Soup.encode('utf-8')) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
690 |
self.assertEquals(utf8Soup.originalEncoding, "utf-8") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
691 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
692 |
utf8Soup = BeautifulStoneSoup(unicodeData) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
693 |
self.assertEquals(utf8, utf8Soup.encode('utf-8')) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
694 |
self.assertEquals(utf8Soup.originalEncoding, None) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
695 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
696 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
697 |
def testHandleInvalidCodec(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
698 |
for bad_encoding in ['.utf8', '...', 'utF---16.!']: |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
699 |
soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"), |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
700 |
fromEncoding=bad_encoding) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
701 |
self.assertEquals(soup.originalEncoding, 'utf-8') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
702 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
703 |
def testUnicodeSearch(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
704 |
html = u'<html><body><h1>Räksmörgås</h1></body></html>' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
705 |
soup = BeautifulSoup(html) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
706 |
self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
707 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
708 |
def testRewrittenXMLHeader(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
709 |
euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
710 |
utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
711 |
soup = BeautifulStoneSoup(euc_jp) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
712 |
if soup.originalEncoding != "euc-jp": |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
713 |
raise Exception("Test failed when parsing euc-jp document. " |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
714 |
"If you're running Python >=2.4, or you have " |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
715 |
"cjkcodecs installed, this is a real problem. " |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
716 |
"Otherwise, ignore it.") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
717 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
718 |
self.assertEquals(soup.originalEncoding, "euc-jp") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
719 |
self.assertEquals(soup.renderContents('utf-8'), utf8) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
720 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
721 |
old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
722 |
new_text = "<?xml version='1.0' encoding='utf-8'?><foo>’</foo>" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
723 |
self.assertSoupEquals(old_text, new_text) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
724 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
725 |
def testRewrittenMetaTag(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
726 |
no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>''' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
727 |
soup = BeautifulSoup(no_shift_jis_html) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
728 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
729 |
# Beautiful Soup used to try to rewrite the meta tag even if the |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
730 |
# meta tag got filtered out by the strainer. This test makes |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
731 |
# sure that doesn't happen. |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
732 |
strainer = SoupStrainer('pre') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
733 |
soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
734 |
self.assertEquals(soup.contents[0].name, 'pre') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
735 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
736 |
meta_tag = ('<meta content="text/html; charset=x-sjis" ' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
737 |
'http-equiv="Content-type" />') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
738 |
shift_jis_html = ( |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
739 |
'<html><head>\n%s\n' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
740 |
'<meta http-equiv="Content-language" content="ja" />' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
741 |
'</head><body><pre>\n' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
742 |
'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
743 |
'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
744 |
'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
745 |
'</pre></body></html>') % meta_tag |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
746 |
soup = BeautifulSoup(shift_jis_html) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
747 |
if soup.originalEncoding != "shift-jis": |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
748 |
raise Exception("Test failed when parsing shift-jis document " |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
749 |
"with meta tag '%s'." |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
750 |
"If you're running Python >=2.4, or you have " |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
751 |
"cjkcodecs installed, this is a real problem. " |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
752 |
"Otherwise, ignore it." % meta_tag) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
753 |
self.assertEquals(soup.originalEncoding, "shift-jis") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
754 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
755 |
content_type_tag = soup.meta['content'] |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
756 |
self.assertEquals(content_type_tag[content_type_tag.find('charset='):], |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
757 |
'charset=%SOUP-ENCODING%') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
758 |
content_type = str(soup.meta) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
759 |
index = content_type.find('charset=') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
760 |
self.assertEqual(content_type[index:index+len('charset=utf8')+1], |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
761 |
'charset=utf-8') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
762 |
content_type = soup.meta.encode('shift-jis') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
763 |
index = content_type.find('charset=') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
764 |
self.assertEqual(content_type[index:index+len('charset=shift-jis')], |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
765 |
'charset=shift-jis'.encode()) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
766 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
767 |
self.assertEquals(soup.encode('utf-8'), ( |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
768 |
'<html><head>\n' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
769 |
'<meta content="text/html; charset=utf-8" ' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
770 |
'http-equiv="Content-type" />\n' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
771 |
'<meta http-equiv="Content-language" content="ja" />' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
772 |
'</head><body><pre>\n' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
773 |
'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
774 |
'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
775 |
'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
776 |
'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
777 |
'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
778 |
'</pre></body></html>')) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
779 |
self.assertEquals(soup.encode("shift-jis"), |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
780 |
shift_jis_html.replace('x-sjis'.encode(), |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
781 |
'shift-jis'.encode())) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
782 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
783 |
isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
784 |
soup = BeautifulSoup(isolatin) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
785 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
786 |
utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode()) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
787 |
utf8 = utf8.replace("\xe9", "\xc3\xa9") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
788 |
self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
789 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
790 |
def testHebrew(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
791 |
iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
792 |
utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n' |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
793 |
soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
794 |
self.assertEquals(soup.encode('utf-8'), utf8) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
795 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
796 |
def testSmartQuotesNotSoSmartAnymore(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
797 |
self.assertSoupEquals("\x91Foo\x92 <!--blah-->", |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
798 |
'‘Foo’ <!--blah-->') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
799 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
800 |
def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
801 |
smartQuotes = "Il a dit, \x8BSacré bleu!\x9b" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
802 |
soup = BeautifulSoup(smartQuotes) |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
803 |
self.assertEquals(soup.decode(), |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
804 |
'Il a dit, ‹Sacré bleu!›') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
805 |
soup = BeautifulSoup(smartQuotes, convertEntities="html") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
806 |
self.assertEquals(soup.encode('utf-8'), |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
807 |
'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
808 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
809 |
def testDontSeeSmartQuotesWhereThereAreNone(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
810 |
utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
811 |
self.assertSoupEquals(utf_8, encoding='utf-8') |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
812 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
813 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
814 |
class Whitewash(SoupTest): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
815 |
"""Test whitespace preservation.""" |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
816 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
817 |
def testPreservedWhitespace(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
818 |
self.assertSoupEquals("<pre> </pre>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
819 |
self.assertSoupEquals("<pre> woo </pre>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
820 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
821 |
def testCollapsedWhitespace(self): |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
822 |
self.assertSoupEquals("<p> </p>", "<p> </p>") |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
823 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
824 |
|
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
825 |
if __name__ == '__main__': |
b3daada52dd3
Add BeautifulSoup Python HTML/XML parser to Melange repository.
Pawel Solyga <Pawel.Solyga@gmail.com>
parents:
diff
changeset
|
826 |
unittest.main() |