1 """This module encapsulates a document description stored in an XML file.
2
3 This is mainly used by GNUmed/Archive.
4
5 @copyright: GPL v2 or later
6 """
7
8 __author__ = "Karsten Hilbert <Karsten.Hilbert@gmx.net>"
9
10 import sys
11 import os.path
12 import fileinput
13 import string
14 import logging
15
16
17 _log = logging.getLogger('gm.docs')
18
20
21 _get_handler = {}
22
23 - def __init__(self, aBaseDir = None, aCfg = None, aGroup = 'metadata'):
24
25 if aBaseDir is None:
26 raise ConstructorError, "need document path"
27 if not os.path.exists(os.path.abspath(aBaseDir)):
28 raise ConstructorError, "document path [%s] does not exist" % aBaseDir
29 self.__base_dir = aBaseDir
30 _log.debug("working from directory [%s]" % self.__base_dir)
31
32 if aCfg is None:
33 _log.warning('no config file specified')
34 import gmCfg
35 self.__cfg = gmCfg.gmDefCfgFile
36 else:
37 self.__cfg = aCfg
38
39 self.__group = str(aGroup)
40
41 tmp = self.__cfg.get(self.__group, "description")
42 self.__xml_file = os.path.join(self.__base_dir, tmp)
43 if not os.path.exists(self.__xml_file):
44 raise ConstructorError, "skipping [%s]: description file [%s] not found" % (self.__base_dir, tmp)
45
46 self.__data = {}
47
48
49
50
51 return None
52
102
103
104
106 try:
107 return self.__data[item]
108 except KeyError:
109 try:
110 return xmlDocDesc._get_handler[item](self)
111 except KeyError:
112 _log.exception('[%s] neither cached in self.__data nor get handler available' % item, sys.exc_info())
113 return None
114
116 try:
117 return self.__data['objects']
118 except KeyError:
119 self.__load_obj_list()
120 return self.__data['objects']
121 return None
122
123 _get_handler['objects'] = _get_obj_list
124
126 """Read list of image files from XML metadata file.
127
128 We assume the order of file names to correspond to the sequence of pages.
129 - don't use self.__get_from_xml, because we want to
130 scan lines sequentially here
131 """
132 self.__data['objects'] = {}
133 tag_name = self.__cfg.get(self.__group, "obj_tag")
134
135 idx = 0
136 for line in fileinput.input(self.__xml_file):
137 content = self.__extract_xml_content(line, tag_name)
138 if content is None:
139 continue
140 idx += 1
141 tmp = {}
142 tmp['file name'] = os.path.abspath(os.path.join(self.__base_dir, content))
143
144 tmp['index'] = idx
145
146
147
148 self.__data['objects'][idx] = tmp
149
150
151 fileinput.close()
152
153 if idx == 0:
154 _log.warning("no files found for import")
155 return None
156
157 _log.debug("document data files to be processed: %s" % self.__data['objects'])
158
159 return 1
160
161
162
164 print "remove_object: FIXME !!"
165 print anObjID
166 return 1
167
169
170 if not type(aTag) is type(''):
171 _log.error("Argument aTag (" + str(aTag) + ") is not a string.")
172 return None
173
174 TagStart = "<" + aTag + ">"
175 TagEnd = "</" + aTag + ">"
176
177 _log.info("Retrieving " + TagStart + "content" + TagEnd + ".")
178
179 inTag = 0
180 content = []
181
182 for line in fileinput.input(self.__xml_file):
183 tmp = line
184
185
186 if string.find(tmp, TagStart) != -1:
187 inTag = 1
188
189 (junk, good_stuff) = string.split (tmp, TagStart, 1)
190 _log.debug("Found tag start in line: junk='%s' content='%s'" % (junk, good_stuff))
191 tmp = good_stuff
192
193
194 if string.find(tmp, TagEnd) != -1:
195
196 if inTag == 1:
197
198 (good_stuff, junk) = string.split (tmp, TagEnd, 1)
199 _log.debug("Found tag end in line: junk='%s' content='%s'" % (junk, good_stuff))
200 content.append(good_stuff)
201
202 break
203
204
205 if inTag == 1:
206 content.append(tmp)
207
208
209 fileinput.close()
210
211
212 if len(content) > 0:
213 _log.debug("%s tag content successfully read: %s" % (TagStart, str(content)))
214 return content
215 else:
216 return None
217
218
220
221 start_tag_pos = string.find(aLine,'<%s' % aTag)
222 if start_tag_pos == -1:
223 return None
224
225 end_tag_pos = string.find(aLine, '</%s>' % aTag)
226 if end_tag_pos == -1:
227
228 _log.error("Line [%s] is incomplete for tag [%s]. We don't do multiline tags here." % (aLine, aTag))
229 return None
230
231 content_start = string.find(aLine,'>', start_tag_pos, end_tag_pos) + 1
232 return aLine[content_start:end_tag_pos]
233
234
235
236