|
1 """ |
|
2 module for generating and serializing xml and html structures |
|
3 by using simple python objects. |
|
4 |
|
5 (c) holger krekel, holger at merlinux eu. 2009 |
|
6 """ |
|
7 import py |
|
8 import sys, re |
|
9 |
|
10 if sys.version_info >= (3,0): |
|
11 def u(s): |
|
12 return s |
|
13 def unicode(x): |
|
14 if hasattr(x, '__unicode__'): |
|
15 return x.__unicode__() |
|
16 return str(x) |
|
17 else: |
|
18 def u(s): |
|
19 return unicode(s) |
|
20 unicode = unicode |
|
21 |
|
22 |
|
23 class NamespaceMetaclass(type): |
|
24 def __getattr__(self, name): |
|
25 if name[:1] == '_': |
|
26 raise AttributeError(name) |
|
27 if self == Namespace: |
|
28 raise ValueError("Namespace class is abstract") |
|
29 tagspec = self.__tagspec__ |
|
30 if tagspec is not None and name not in tagspec: |
|
31 raise AttributeError(name) |
|
32 classattr = {} |
|
33 if self.__stickyname__: |
|
34 classattr['xmlname'] = name |
|
35 cls = type(name, (self.__tagclass__,), classattr) |
|
36 setattr(self, name, cls) |
|
37 return cls |
|
38 |
|
39 class Tag(list): |
|
40 class Attr(object): |
|
41 def __init__(self, **kwargs): |
|
42 self.__dict__.update(kwargs) |
|
43 |
|
44 def __init__(self, *args, **kwargs): |
|
45 super(Tag, self).__init__(args) |
|
46 self.attr = self.Attr(**kwargs) |
|
47 |
|
48 def __unicode__(self): |
|
49 return self.unicode(indent=0) |
|
50 __str__ = __unicode__ |
|
51 |
|
52 def unicode(self, indent=2): |
|
53 l = [] |
|
54 SimpleUnicodeVisitor(l.append, indent).visit(self) |
|
55 return "".join(l) |
|
56 |
|
57 def __repr__(self): |
|
58 name = self.__class__.__name__ |
|
59 return "<%r tag object %d>" % (name, id(self)) |
|
60 |
|
61 Namespace = NamespaceMetaclass('Namespace', (object, ), { |
|
62 '__tagspec__': None, |
|
63 '__tagclass__': Tag, |
|
64 '__stickyname__': False, |
|
65 }) |
|
66 |
|
67 class HtmlTag(Tag): |
|
68 def unicode(self, indent=2): |
|
69 l = [] |
|
70 HtmlVisitor(l.append, indent, shortempty=False).visit(self) |
|
71 return u("").join(l) |
|
72 |
|
73 # exported plain html namespace |
|
74 class html(Namespace): |
|
75 __tagclass__ = HtmlTag |
|
76 __stickyname__ = True |
|
77 __tagspec__ = dict([(x,1) for x in ( |
|
78 'a,abbr,acronym,address,applet,area,b,bdo,big,blink,' |
|
79 'blockquote,body,br,button,caption,center,cite,code,col,' |
|
80 'colgroup,comment,dd,del,dfn,dir,div,dl,dt,em,embed,' |
|
81 'fieldset,font,form,frameset,h1,h2,h3,h4,h5,h6,head,html,' |
|
82 'i,iframe,img,input,ins,kbd,label,legend,li,link,listing,' |
|
83 'map,marquee,menu,meta,multicol,nobr,noembed,noframes,' |
|
84 'noscript,object,ol,optgroup,option,p,pre,q,s,script,' |
|
85 'select,small,span,strike,strong,style,sub,sup,table,' |
|
86 'tbody,td,textarea,tfoot,th,thead,title,tr,tt,u,ul,xmp,' |
|
87 'base,basefont,frame,hr,isindex,param,samp,var' |
|
88 ).split(',') if x]) |
|
89 |
|
90 class Style(object): |
|
91 def __init__(self, **kw): |
|
92 for x, y in kw.items(): |
|
93 x = x.replace('_', '-') |
|
94 setattr(self, x, y) |
|
95 |
|
96 |
|
97 class raw(object): |
|
98 """just a box that can contain a unicode string that will be |
|
99 included directly in the output""" |
|
100 def __init__(self, uniobj): |
|
101 self.uniobj = uniobj |
|
102 |
|
103 class SimpleUnicodeVisitor(object): |
|
104 """ recursive visitor to write unicode. """ |
|
105 def __init__(self, write, indent=0, curindent=0, shortempty=True): |
|
106 self.write = write |
|
107 self.cache = {} |
|
108 self.visited = {} # for detection of recursion |
|
109 self.indent = indent |
|
110 self.curindent = curindent |
|
111 self.parents = [] |
|
112 self.shortempty = shortempty # short empty tags or not |
|
113 |
|
114 def visit(self, node): |
|
115 """ dispatcher on node's class/bases name. """ |
|
116 cls = node.__class__ |
|
117 try: |
|
118 visitmethod = self.cache[cls] |
|
119 except KeyError: |
|
120 for subclass in cls.__mro__: |
|
121 visitmethod = getattr(self, subclass.__name__, None) |
|
122 if visitmethod is not None: |
|
123 break |
|
124 else: |
|
125 visitmethod = self.object |
|
126 self.cache[cls] = visitmethod |
|
127 visitmethod(node) |
|
128 |
|
129 def object(self, obj): |
|
130 #self.write(obj) |
|
131 self.write(escape(unicode(obj))) |
|
132 |
|
133 def raw(self, obj): |
|
134 self.write(obj.uniobj) |
|
135 |
|
136 def list(self, obj): |
|
137 assert id(obj) not in self.visited |
|
138 self.visited[id(obj)] = 1 |
|
139 map(self.visit, obj) |
|
140 |
|
141 def Tag(self, tag): |
|
142 assert id(tag) not in self.visited |
|
143 try: |
|
144 tag.parent = self.parents[-1] |
|
145 except IndexError: |
|
146 tag.parent = None |
|
147 self.visited[id(tag)] = 1 |
|
148 tagname = getattr(tag, 'xmlname', tag.__class__.__name__) |
|
149 if self.curindent and not self._isinline(tagname): |
|
150 self.write("\n" + u(' ') * self.curindent) |
|
151 if tag: |
|
152 self.curindent += self.indent |
|
153 self.write(u('<%s%s>') % (tagname, self.attributes(tag))) |
|
154 self.parents.append(tag) |
|
155 for x in tag: |
|
156 self.visit(x) |
|
157 self.parents.pop() |
|
158 self.write(u('</%s>') % tagname) |
|
159 self.curindent -= self.indent |
|
160 else: |
|
161 nameattr = tagname+self.attributes(tag) |
|
162 if self._issingleton(tagname): |
|
163 self.write(u('<%s/>') % (nameattr,)) |
|
164 else: |
|
165 self.write(u('<%s></%s>') % (nameattr, tagname)) |
|
166 |
|
167 def attributes(self, tag): |
|
168 # serialize attributes |
|
169 attrlist = dir(tag.attr) |
|
170 attrlist.sort() |
|
171 l = [] |
|
172 for name in attrlist: |
|
173 res = self.repr_attribute(tag.attr, name) |
|
174 if res is not None: |
|
175 l.append(res) |
|
176 l.extend(self.getstyle(tag)) |
|
177 return u("").join(l) |
|
178 |
|
179 def repr_attribute(self, attrs, name): |
|
180 if name[:2] != '__': |
|
181 value = getattr(attrs, name) |
|
182 if name.endswith('_'): |
|
183 name = name[:-1] |
|
184 return ' %s="%s"' % (name, escape(unicode(value))) |
|
185 |
|
186 def getstyle(self, tag): |
|
187 """ return attribute list suitable for styling. """ |
|
188 try: |
|
189 styledict = tag.style.__dict__ |
|
190 except AttributeError: |
|
191 return [] |
|
192 else: |
|
193 stylelist = [x+': ' + y for x,y in styledict.items()] |
|
194 return [u(' style="%s"') % u('; ').join(stylelist)] |
|
195 |
|
196 def _issingleton(self, tagname): |
|
197 """can (and will) be overridden in subclasses""" |
|
198 return self.shortempty |
|
199 |
|
200 def _isinline(self, tagname): |
|
201 """can (and will) be overridden in subclasses""" |
|
202 return False |
|
203 |
|
204 class HtmlVisitor(SimpleUnicodeVisitor): |
|
205 |
|
206 single = dict([(x, 1) for x in |
|
207 ('br,img,area,param,col,hr,meta,link,base,' |
|
208 'input,frame').split(',')]) |
|
209 inline = dict([(x, 1) for x in |
|
210 ('a abbr acronym b basefont bdo big br cite code dfn em font ' |
|
211 'i img input kbd label q s samp select small span strike ' |
|
212 'strong sub sup textarea tt u var'.split(' '))]) |
|
213 |
|
214 def repr_attribute(self, attrs, name): |
|
215 if name == 'class_': |
|
216 value = getattr(attrs, name) |
|
217 if value is None: |
|
218 return |
|
219 return super(HtmlVisitor, self).repr_attribute(attrs, name) |
|
220 |
|
221 def _issingleton(self, tagname): |
|
222 return tagname in self.single |
|
223 |
|
224 def _isinline(self, tagname): |
|
225 return tagname in self.inline |
|
226 |
|
227 |
|
228 class _escape: |
|
229 def __init__(self): |
|
230 self.escape = { |
|
231 u('"') : u('"'), u('<') : u('<'), u('>') : u('>'), |
|
232 u('&') : u('&'), u("'") : u('''), |
|
233 } |
|
234 self.charef_rex = re.compile(u("|").join(self.escape.keys())) |
|
235 |
|
236 def _replacer(self, match): |
|
237 return self.escape[match.group(0)] |
|
238 |
|
239 def __call__(self, ustring): |
|
240 """ xml-escape the given unicode string. """ |
|
241 ustring = unicode(ustring) |
|
242 return self.charef_rex.sub(self._replacer, ustring) |
|
243 |
|
244 escape = _escape() |