Package translate :: Package tools :: Module posegment
[hide private]
[frames] | no frames]

Source Code for Module translate.tools.posegment

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2008-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Segment Gettext PO, XLIFF and TMX localization files at the sentence level. 
 22   
 23  See: http://translate.sourceforge.net/wiki/toolkit/posegment for examples and 
 24  usage instructions 
 25  """ 
 26   
 27  from translate.storage import factory 
 28  from translate.lang import factory as lang_factory 
 29   
 30   
31 -class segment:
32
33 - def __init__(self, sourcelang, targetlang, stripspaces=True):
34 self.sourcelang = sourcelang 35 self.targetlang = targetlang 36 self.stripspaces = stripspaces
37
38 - def segmentunit(self, unit):
39 if unit.isheader() or unit.hasplural(): 40 return [unit] 41 sourcesegments = self.sourcelang.sentences(unit.source, strip=self.stripspaces) 42 targetsegments = self.targetlang.sentences(unit.target, strip=self.stripspaces) 43 if unit.istranslated() and (len(sourcesegments) != len(targetsegments)): 44 return [unit] 45 # We could do more here to check if the lengths correspond more or less, 46 # certain quality checks are passed, etc. But for now this is a good 47 # start. 48 units = [] 49 for i in range(len(sourcesegments)): 50 newunit = unit.copy() 51 newunit.source = sourcesegments[i] 52 if not unit.istranslated(): 53 newunit.target = "" 54 else: 55 newunit.target = targetsegments[i] 56 units.append(newunit) 57 return units
58
59 - def convertstore(self, fromstore):
60 tostore = type(fromstore)() 61 for unit in fromstore.units: 62 newunits = self.segmentunit(unit) 63 for newunit in newunits: 64 tostore.addunit(newunit) 65 return tostore
66 67
68 -def segmentfile(inputfile, outputfile, templatefile, sourcelanguage="en", targetlanguage=None, stripspaces=True):
69 """reads in inputfile, segments it then, writes to outputfile""" 70 # note that templatefile is not used, but it is required by the converter... 71 inputstore = factory.getobject(inputfile) 72 if inputstore.isempty(): 73 return 0 74 sourcelang = lang_factory.getlanguage(sourcelanguage) 75 targetlang = lang_factory.getlanguage(targetlanguage) 76 convertor = segment(sourcelang, targetlang, stripspaces=stripspaces) 77 outputstore = convertor.convertstore(inputstore) 78 outputfile.write(str(outputstore)) 79 return 1
80 81
82 -def main():
83 from translate.convert import convert 84 formats = {"po": ("po", segmentfile), "xlf": ("xlf", segmentfile), "tmx": ("tmx", segmentfile)} 85 parser = convert.ConvertOptionParser(formats, usepots=True, description=__doc__) 86 parser.add_option("-l", "--language", dest="targetlanguage", default=None, 87 help="the target language code", metavar="LANG") 88 parser.add_option("", "--source-language", dest="sourcelanguage", default=None, 89 help="the source language code (default 'en')", metavar="LANG") 90 parser.passthrough.append("sourcelanguage") 91 parser.passthrough.append("targetlanguage") 92 parser.add_option("", "--keepspaces", dest="stripspaces", action="store_false", 93 default=True, help="Disable automatic stripping of whitespace") 94 parser.passthrough.append("stripspaces") 95 parser.run()
96 97 98 if __name__ == '__main__': 99 main() 100