1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """classes that hold units of .po files (pounit) or entire files (pofile)
22 gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)"""
23
24 from __future__ import generators
25 from translate.misc.multistring import multistring
26 from translate.misc import quote
27 from translate.misc import textwrap
28 from translate.lang import data
29 from translate.storage import pocommon, base, poparser
30 from translate.storage.pocommon import encodingToUse
31 import re
32 import copy
33 import cStringIO
34
35 lsep = "\n#: "
36 """Seperator for #: entries"""
37
38
39
40 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'}
41 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
42
44 """Escapes a line for po format. assumes no \n occurs in the line.
45
46 @param line: unescaped text
47 """
48 special_locations = []
49 for special_key in po_escape_map:
50 special_locations.extend(quote.find_all(line, special_key))
51 special_locations = dict.fromkeys(special_locations).keys()
52 special_locations.sort()
53 escaped_line = ""
54 last_location = 0
55 for location in special_locations:
56 escaped_line += line[last_location:location]
57 escaped_line += po_escape_map[line[location:location+1]]
58 last_location = location+1
59 escaped_line += line[last_location:]
60 return escaped_line
61
65
67 """Wrap text for po files."""
68 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False)
69
70
71 if len(wrappedlines) > 1:
72 for index, line in enumerate(wrappedlines[1:]):
73 if line.startswith(' '):
74
75 wrappedlines[index+1] = line[1:]
76
77
78 wrappedlines[index] += ' '
79 return wrappedlines
80
82 """quotes the given text for a PO file, returning quoted and escaped lines"""
83 polines = []
84 if text is None:
85 return polines
86 lines = text.split("\n")
87 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71):
88 if len(lines) != 2 or lines[1]:
89 polines.extend(['""'])
90 for line in lines[:-1]:
91
92 lns = wrapline(line)
93 if len(lns) > 0:
94 for ln in lns[:-1]:
95 polines.extend(['"' + escapeforpo(ln) + '"'])
96 if lns[-1]:
97 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"'])
98 else:
99 polines.extend(['"\\n"'])
100 if lines[-1]:
101 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])])
102 return polines
103
105 """Remove quote and unescape line from po file.
106
107 @param line: a quoted line from a po file (msgid or msgstr)
108 """
109 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0]
110 return extracted
111
114
116 return lst == [] or len(lst) == 1 and lst[0] == '""'
117
119 left = string.find('"')
120 right = string.rfind('"')
121 if right > -1:
122 return string[left:right+1]
123 else:
124 return string[left:] + '"'
125
126 -class pounit(pocommon.pounit):
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141 __shallow__ = ['_store']
142
143 - def __init__(self, source=None, encoding="UTF-8"):
144 self._encoding = encodingToUse(encoding)
145 self.obsolete = False
146 self._initallcomments(blankall=True)
147 self.prev_msgctxt = []
148 self.prev_msgid = []
149 self.prev_msgid_plural = []
150 self.msgctxt = []
151 self.msgid = []
152 self.msgid_pluralcomments = []
153 self.msgid_plural = []
154 self.msgstr = []
155 self.obsoletemsgctxt = []
156 self.obsoletemsgid = []
157 self.obsoletemsgid_pluralcomments = []
158 self.obsoletemsgid_plural = []
159 self.obsoletemsgstr = []
160 pocommon.pounit.__init__(self, source)
161
171
179
180 allcomments = property(_get_all_comments)
181
190
208
212
214 """Sets the msgid to the given (unescaped) value.
215
216 @param source: an unescaped source string.
217 """
218 self._rich_source = None
219 self.msgid, self.msgid_plural = self._set_source_vars(source)
220 source = property(getsource, setsource)
221
223 """Returns the unescaped msgid"""
224 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
225
227 """Sets the msgid to the given (unescaped) value.
228
229 @param source: an unescaped source string.
230 """
231 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
232 prev_source = property(_get_prev_source, _set_prev_source)
233
241
243 """Sets the msgstr to the given (unescaped) value"""
244 self._rich_target = None
245 if isinstance(target, str):
246 target = target.decode(self._encoding)
247 if self.hasplural():
248 if isinstance(target, multistring):
249 target = target.strings
250 elif isinstance(target, basestring):
251 target = [target]
252 elif isinstance(target, (dict, list)):
253 if len(target) == 1:
254 target = target[0]
255 else:
256 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
257 templates = self.msgstr
258 if isinstance(templates, list):
259 templates = {0: templates}
260 if isinstance(target, list):
261 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))])
262 elif isinstance(target, dict):
263 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()])
264 else:
265 self.msgstr = quoteforpo(target)
266 target = property(gettarget, settarget)
267
269 """Return comments based on origin value (programmer, developer, source code and translator)"""
270 if origin == None:
271 comments = u"".join([comment[2:] for comment in self.othercomments])
272 comments += u"".join([comment[3:] for comment in self.automaticcomments])
273 elif origin == "translator":
274 comments = u"".join ([comment[2:] for comment in self.othercomments])
275 elif origin in ["programmer", "developer", "source code"]:
276 comments = u"".join([comment[3:] for comment in self.automaticcomments])
277 else:
278 raise ValueError("Comment type not valid")
279
280 return comments[:-1]
281
282 - def addnote(self, text, origin=None, position="append"):
283 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
284
285 if not (text and text.strip()):
286 return
287 text = data.forceunicode(text)
288 commentlist = self.othercomments
289 linestart = "# "
290 autocomments = False
291 if origin in ["programmer", "developer", "source code"]:
292 autocomments = True
293 commentlist = self.automaticcomments
294 linestart = "#. "
295 text = text.split("\n")
296 newcomments = [linestart + line + "\n" for line in text]
297 if position == "append":
298 newcomments = commentlist + newcomments
299 elif position == "prepend":
300 newcomments = newcomments + commentlist
301
302 if autocomments:
303 self.automaticcomments = newcomments
304 else:
305 self.othercomments = newcomments
306
308 """Remove all the translator's notes (other comments)"""
309 self.othercomments = []
310
312
313 new_unit = self.__class__()
314
315
316 shallow = set(self.__shallow__)
317
318 for key, value in self.__dict__.iteritems():
319 if key not in shallow:
320 setattr(new_unit, key, copy.deepcopy(value))
321
322 for key in set(shallow):
323 setattr(new_unit, key, getattr(self, key))
324
325
326 memo[id(self)] = self
327
328 return new_unit
329
331 return copy.deepcopy(self)
332
338
345
346 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
347 """Merges the otherpo (with the same msgid) into this one.
348
349 Overwrite non-blank self.msgstr only if overwrite is True
350 merge comments only if comments is True
351 """
352
353 def mergelists(list1, list2, split=False):
354
355 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
356 for position, item in enumerate(list1):
357 if isinstance(item, str):
358 list1[position] = item.decode("utf-8")
359 for position, item in enumerate(list2):
360 if isinstance(item, str):
361 list2[position] = item.decode("utf-8")
362
363
364 lineend = ""
365 if list1 and list1[0]:
366 for candidate in ["\n", "\r", "\n\r"]:
367 if list1[0].endswith(candidate):
368 lineend = candidate
369 if not lineend:
370 lineend = ""
371 else:
372 lineend = "\n"
373
374
375 if split:
376 splitlist1 = []
377 splitlist2 = []
378 prefix = "#"
379 for item in list1:
380 splitlist1.extend(item.split()[1:])
381 prefix = item.split()[0]
382 for item in list2:
383 splitlist2.extend(item.split()[1:])
384 prefix = item.split()[0]
385 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1])
386 else:
387
388 if list1 != list2:
389 for item in list2:
390 if lineend:
391 item = item.rstrip() + lineend
392
393 if item not in list1 or len(item) < 5:
394 list1.append(item)
395 if not isinstance(otherpo, pounit):
396 super(pounit, self).merge(otherpo, overwrite, comments)
397 return
398 if comments:
399 mergelists(self.othercomments, otherpo.othercomments)
400 mergelists(self.typecomments, otherpo.typecomments)
401 if not authoritative:
402
403
404 mergelists(self.automaticcomments, otherpo.automaticcomments)
405 mergelists(self.msgidcomments, otherpo.msgidcomments)
406 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
407 if not self.istranslated() or overwrite:
408
409 if self._extract_msgidcomments(otherpo.target):
410 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '')
411 self.target = otherpo.target
412 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext():
413 self.markfuzzy()
414 else:
415 self.markfuzzy(otherpo.isfuzzy())
416 elif not otherpo.istranslated():
417 if self.source != otherpo.source:
418 self.markfuzzy()
419 else:
420 if self.target != otherpo.target:
421 self.markfuzzy()
422
424
425
426 return (is_null(self.msgid)
427 and not is_null(self.msgstr)
428 and self.msgidcomments == []
429 and is_null(self.msgctxt)
430 )
431
433 if self.isheader() or len(self.msgidcomments):
434 return False
435 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)):
436 return True
437 return False
438
439
440
441
446
454
468
471
474
477
479 """Makes this unit obsolete"""
480 self.obsolete = True
481 if self.msgctxt:
482 self.obsoletemsgctxt = self.msgctxt
483 if self.msgid:
484 self.obsoletemsgid = self.msgid
485 self.msgid = []
486 if self.msgidcomments:
487 self.obsoletemsgidcomments = self.msgidcomments
488 self.msgidcomments = []
489 if self.msgid_plural:
490 self.obsoletemsgid_plural = self.msgid_plural
491 self.msgid_plural = []
492 if self.msgstr:
493 self.obsoletemsgstr = self.msgstr
494 self.msgstr = []
495 self.sourcecomments = []
496 self.automaticcomments = []
497
499 """Makes an obsolete unit normal"""
500 self.obsolete = False
501 if self.obsoletemsgctxt:
502 self.msgid = self.obsoletemsgctxt
503 self.obsoletemsgctxt = []
504 if self.obsoletemsgid:
505 self.msgid = self.obsoletemsgid
506 self.obsoletemsgid = []
507 if self.obsoletemsgidcomments:
508 self.msgidcomments = self.obsoletemsgidcomments
509 self.obsoletemsgidcomments = []
510 if self.obsoletemsgid_plural:
511 self.msgid_plural = self.obsoletemsgid_plural
512 self.obsoletemsgid_plural = []
513 if self.obsoletemsgstr:
514 self.msgstr = self.obsoletemsgstr
515 self.obsoletemgstr = []
516
518 """returns whether this pounit contains plural strings..."""
519 return len(self.msgid_plural) > 0
520
523
525 if isinstance(partlines, dict):
526 partkeys = partlines.keys()
527 partkeys.sort()
528 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys])
529 partstr = partname + " "
530 partstartline = 0
531 if len(partlines) > 0 and len(partcomments) == 0:
532 partstr += partlines[0]
533 partstartline = 1
534 elif len(partcomments) > 0:
535 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0:
536
537 partstr += partlines[0] + '\n'
538
539 if len(partlines) > 1:
540 partstartline += 1
541 else:
542
543 partstr += '""\n'
544
545 if len(partcomments) > 1:
546 combinedcomment = []
547 for comment in partcomments:
548 comment = unquotefrompo([comment])
549 if comment.startswith("_:"):
550 comment = comment[len("_:"):]
551 if comment.endswith("\\n"):
552 comment = comment[:-len("\\n")]
553
554 combinedcomment.append(comment)
555 partcomments = quoteforpo("_:%s" % "".join(combinedcomment))
556
557 partstr += "\n".join(partcomments)
558 partstr = quote.rstripeol(partstr)
559 else:
560 partstr += '""'
561 partstr += '\n'
562
563 for partline in partlines[partstartline:]:
564 partstr += partline + '\n'
565 return partstr
566
568 """encodes unicode strings and returns other strings unchanged"""
569 if isinstance(output, unicode):
570 encoding = encodingToUse(getattr(self, "encoding", "UTF-8"))
571 return output.encode(encoding)
572 return output
573
575 """convert to a string. double check that unicode is handled somehow here"""
576 output = self._getoutput()
577 return self._encodeifneccessary(output)
578
580 """return this po element as a string"""
581 def add_prev_msgid_lines(lines, header, var):
582 if len(var) > 0:
583 lines.append("#| %s %s\n" % (header, var[0]))
584 lines.extend("#| %s\n" % line for line in var[1:])
585
586 def add_prev_msgid_info(lines):
587 add_prev_msgid_lines(lines, 'msgctxt', self.prev_msgctxt)
588 add_prev_msgid_lines(lines, 'msgid', self.prev_msgid)
589 add_prev_msgid_lines(lines, 'msgid_plural', self.prev_msgid_plural)
590
591 lines = []
592 lines.extend(self.othercomments)
593 if self.isobsolete():
594 lines.extend(self.typecomments)
595 obsoletelines = []
596 if self.obsoletemsgctxt:
597 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt))
598 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments))
599 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments:
600 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments))
601 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr))
602 for index, obsoleteline in enumerate(obsoletelines):
603
604 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "')
605 lines.extend(obsoletelines)
606 lines = [self._encodeifneccessary(line) for line in lines]
607 return "".join(lines)
608
609
610 if is_null(self.msgid):
611 if not (self.isheader() or self.getcontext() or self.sourcecomments):
612 return "".join(lines)
613 lines.extend(self.automaticcomments)
614 lines.extend(self.sourcecomments)
615 lines.extend(self.typecomments)
616 add_prev_msgid_info(lines)
617 if self.msgctxt:
618 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt))
619 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments))
620 if self.msgid_plural or self.msgid_pluralcomments:
621 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
622 lines.append(self._getmsgpartstr("msgstr", self.msgstr))
623 lines = [self._encodeifneccessary(line) for line in lines]
624 postr = "".join(lines)
625 return postr
626
628 """Get a list of locations from sourcecomments in the PO unit
629
630 rtype: List
631 return: A list of the locations with '#: ' stripped
632
633 """
634 locations = []
635 for sourcecomment in self.sourcecomments:
636 locations += quote.rstripeol(sourcecomment)[3:].split()
637 return locations
638
640 """Add a location to sourcecomments in the PO unit
641
642 @param location: Text location e.g. 'file.c:23' does not include #:
643 @type location: String
644
645 """
646 self.sourcecomments.append("#: %s\n" % location)
647
658
664
665 msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
666
667 - def getcontext(self):
668 """Get the message context."""
669 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
670
672 """Returns a unique identifier for this unit."""
673 context = self.getcontext()
674
675
676
677
678
679 id = self.source
680 if self.msgidcomments:
681 id = u"_: %s\n%s" % (context, id)
682 elif context:
683 id = u"%s\04%s" % (context, id)
684 return id
685
686 -class pofile(pocommon.pofile):
687 """A .po file containing various units"""
688 UnitClass = pounit
689
691 """Parses the given file or file source string."""
692 try:
693 if hasattr(input, 'name'):
694 self.filename = input.name
695 elif not getattr(self, 'filename', ''):
696 self.filename = ''
697 if isinstance(input, str):
698 input = cStringIO.StringIO(input)
699
700 self.units = []
701 poparser.parse_units(poparser.ParseState(input, pounit), self)
702 except Exception, e:
703 raise base.ParseError(e)
704
706 """Make sure each msgid is unique ; merge comments etc from duplicates into original"""
707
708
709 id_dict = {}
710 uniqueunits = []
711
712
713 markedpos = []
714 def addcomment(thepo):
715 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations()))
716 markedpos.append(thepo)
717 for thepo in self.units:
718 id = thepo.getid()
719 if thepo.isheader() and not thepo.getlocations():
720
721 uniqueunits.append(thepo)
722 elif id in id_dict:
723 if duplicatestyle == "merge":
724 if id:
725 id_dict[id].merge(thepo)
726 else:
727 addcomment(thepo)
728 uniqueunits.append(thepo)
729 elif duplicatestyle == "msgctxt":
730 origpo = id_dict[id]
731 if origpo not in markedpos:
732 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations())))
733 markedpos.append(thepo)
734 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
735 uniqueunits.append(thepo)
736 else:
737 if not id:
738 if duplicatestyle == "merge":
739 addcomment(thepo)
740 else:
741 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
742 id_dict[id] = thepo
743 uniqueunits.append(thepo)
744 self.units = uniqueunits
745
747 """Convert to a string. double check that unicode is handled somehow here"""
748 output = self._getoutput()
749 if isinstance(output, unicode):
750 return output.encode(getattr(self, "encoding", "UTF-8"))
751 return output
752
754 """convert the units back to lines"""
755 lines = []
756 for unit in self.units:
757 unitsrc = str(unit) + "\n"
758 lines.append(unitsrc)
759 lines = "".join(self.encode(lines)).rstrip()
760
761 if lines:
762 lines += "\n"
763 return lines
764
766 """encode any unicode strings in lines in self._encoding"""
767 newlines = []
768 encoding = self._encoding
769 if encoding is None or encoding.lower() == "charset":
770 encoding = 'UTF-8'
771 for line in lines:
772 if isinstance(line, unicode):
773 line = line.encode(encoding)
774 newlines.append(line)
775 return newlines
776
778 """decode any non-unicode strings in lines with self._encoding"""
779 newlines = []
780 for line in lines:
781 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset":
782 try:
783 line = line.decode(self._encoding)
784 except UnicodeError, e:
785 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line))
786 newlines.append(line)
787 return newlines
788
790 for unit in self.units:
791 if not (unit.isheader() or unit.isobsolete()):
792 yield unit
793