LCOV - code coverage report
Current view: top level - source/third_party/tinygettext/src - po_parser.cpp (source / functions) Hit Total Coverage
Test: 0 A.D. test coverage report Lines: 1 205 0.5 %
Date: 2023-01-19 00:18:29 Functions: 2 15 13.3 %

          Line data    Source code
       1             : // tinygettext - A gettext replacement that works directly on .po files
       2             : // Copyright (c) 2009 Ingo Ruhnke <grumbel@gmail.com>
       3             : //
       4             : // This software is provided 'as-is', without any express or implied
       5             : // warranty. In no event will the authors be held liable for any damages
       6             : // arising from the use of this software.
       7             : //
       8             : // Permission is granted to anyone to use this software for any purpose,
       9             : // including commercial applications, and to alter it and redistribute it
      10             : // freely, subject to the following restrictions:
      11             : //
      12             : // 1. The origin of this software must not be misrepresented; you must not
      13             : //    claim that you wrote the original software. If you use this software
      14             : //    in a product, an acknowledgement in the product documentation would be
      15             : //    appreciated but is not required.
      16             : // 2. Altered source versions must be plainly marked as such, and must not be
      17             : //    misrepresented as being the original software.
      18             : // 3. This notice may not be removed or altered from any source distribution.
      19             : 
      20             : #include "precompiled.h"
      21             : 
      22             : #include "tinygettext/po_parser.hpp"
      23             : 
      24             : #include <iostream>
      25             : #include <ctype.h>
      26             : #include <string>
      27             : #include <istream>
      28             : #include <string.h>
      29             : #include <unordered_map>
      30             : #include <stdlib.h>
      31             : 
      32             : #include "tinygettext/language.hpp"
      33             : #include "tinygettext/log_stream.hpp"
      34             : #include "tinygettext/iconv.hpp"
      35             : #include "tinygettext/dictionary.hpp"
      36             : #include "tinygettext/plural_forms.hpp"
      37             : 
      38             : namespace tinygettext {
      39             : 
      40             : bool POParser::pedantic = true;
      41             : 
      42             : void
      43           0 : POParser::parse(const std::string& filename, std::istream& in, Dictionary& dict)
      44             : {
      45           0 :   POParser parser(filename, in, dict);
      46           0 :   parser.parse();
      47           0 : }
      48             : 
      49             : class POParserError {};
      50             : 
      51           0 : POParser::POParser(const std::string& filename_, std::istream& in_, Dictionary& dict_, bool use_fuzzy_) :
      52             :   filename(filename_),
      53             :   in(in_),
      54             :   dict(dict_),
      55             :   use_fuzzy(use_fuzzy_),
      56             :   running(false),
      57             :   eof(false),
      58             :   big5(false),
      59             :   line_number(0),
      60             :   current_line(),
      61           0 :   conv()
      62             : {
      63           0 : }
      64             : 
      65           0 : POParser::~POParser()
      66             : {
      67           0 : }
      68             : 
      69             : void
      70           0 : POParser::warning(const std::string& msg)
      71             : {
      72           0 :   log_warning << filename << ":" << line_number << ": warning: " << msg << ": " << current_line << std::endl;
      73             :   //log_warning << "Line: " << current_line << std::endl;
      74           0 : }
      75             : 
      76             : void
      77           0 : POParser::error(const std::string& msg)
      78             : {
      79           0 :   log_error << filename << ":" << line_number << ": error: " << msg  << ": " << current_line << std::endl;
      80             : 
      81             :   // Try to recover from an error by searching for start of another entry
      82           0 :   do
      83           0 :     next_line();
      84           0 :   while(!eof && !is_empty_line());
      85             : 
      86           0 :   throw POParserError();
      87             : }
      88             : 
      89             : void
      90           0 : POParser::next_line()
      91             : {
      92           0 :   line_number += 1;
      93           0 :   if (!std::getline(in, current_line))
      94           0 :     eof = true;
      95           0 : }
      96             : 
      97             : void
      98           0 : POParser::get_string_line(std::ostringstream& out, size_t skip)
      99             : {
     100           0 :   if (skip+1 >= static_cast<unsigned int>(current_line.size()))
     101           0 :     error("unexpected end of line");
     102             : 
     103           0 :   if (current_line[skip] != '"')
     104           0 :     error("expected start of string '\"'");
     105             : 
     106             :   std::string::size_type i;
     107           0 :   for(i = skip+1; current_line[i] != '\"'; ++i)
     108             :   {
     109           0 :     if (big5 && static_cast<unsigned char>(current_line[i]) >= 0x81 && static_cast<unsigned char>(current_line[i]) <= 0xfe)
     110             :     {
     111           0 :       out << current_line[i];
     112             : 
     113           0 :       i += 1;
     114             : 
     115           0 :       if (i >= current_line.size())
     116           0 :         error("invalid big5 encoding");
     117             : 
     118           0 :       out << current_line[i];
     119             :     }
     120           0 :     else if (i >= current_line.size())
     121             :     {
     122           0 :       error("unexpected end of string");
     123             :     }
     124           0 :     else if (current_line[i] == '\\')
     125             :     {
     126           0 :       i += 1;
     127             : 
     128           0 :       if (i >= current_line.size())
     129           0 :         error("unexpected end of string in handling '\\'");
     130             : 
     131           0 :       switch (current_line[i])
     132             :       {
     133           0 :         case 'a':  out << '\a'; break;
     134           0 :         case 'b':  out << '\b'; break;
     135           0 :         case 'v':  out << '\v'; break;
     136           0 :         case 'n':  out << '\n'; break;
     137           0 :         case 't':  out << '\t'; break;
     138           0 :         case 'r':  out << '\r'; break;
     139           0 :         case '"':  out << '"'; break;
     140           0 :         case '\\': out << '\\'; break;
     141           0 :         default:
     142           0 :           std::ostringstream err;
     143           0 :           err << "unhandled escape '\\" << current_line[i] << "'";
     144           0 :           warning(err.str());
     145             : 
     146           0 :           out << current_line[i-1] << current_line[i];
     147           0 :           break;
     148             :       }
     149             :     }
     150             :     else
     151             :     {
     152           0 :       out << current_line[i];
     153             :     }
     154             :   }
     155             : 
     156             :   // process trailing garbage in line and warn if there is any
     157           0 :   for(i = i+1; i < current_line.size(); ++i)
     158           0 :     if (!isspace(current_line[i]))
     159             :     {
     160           0 :       warning("unexpected garbage after string ignoren");
     161           0 :       break;
     162             :     }
     163           0 : }
     164             : 
     165             : std::string
     166           0 : POParser::get_string(unsigned int skip)
     167             : {
     168           0 :   std::ostringstream out;
     169             : 
     170           0 :   if (skip+1 >= static_cast<unsigned int>(current_line.size()))
     171           0 :     error("unexpected end of line");
     172             : 
     173           0 :   if (current_line[skip] == ' ' && current_line[skip+1] == '"')
     174             :   {
     175           0 :     get_string_line(out, skip+1);
     176             :   }
     177             :   else
     178             :   {
     179           0 :     if (pedantic)
     180           0 :       warning("keyword and string must be seperated by a single space");
     181             : 
     182             :     for(;;)
     183             :     {
     184           0 :       if (skip >= static_cast<unsigned int>(current_line.size()))
     185           0 :         error("unexpected end of line");
     186           0 :       else if (current_line[skip] == '\"')
     187             :       {
     188           0 :         get_string_line(out, skip);
     189           0 :         break;
     190             :       }
     191           0 :       else if (!isspace(current_line[skip]))
     192             :       {
     193           0 :         error("string must start with '\"'");
     194             :       }
     195             :       else
     196             :       {
     197             :         // skip space
     198             :       }
     199             : 
     200           0 :       skip += 1;
     201             :     }
     202             :   }
     203             : 
     204           0 : next:
     205           0 :   next_line();
     206           0 :   for(std::string::size_type i = 0; i < current_line.size(); ++i)
     207             :   {
     208           0 :     if (current_line[i] == '"')
     209             :     {
     210           0 :       if (i == 1)
     211           0 :         if (pedantic)
     212           0 :           warning("leading whitespace before string");
     213             : 
     214           0 :       get_string_line(out,  i);
     215           0 :       goto next;
     216             :     }
     217           0 :     else if (isspace(current_line[i]))
     218             :     {
     219             :       // skip
     220             :     }
     221             :     else
     222             :     {
     223           0 :       break;
     224             :     }
     225             :   }
     226             : 
     227           0 :   return out.str();
     228             : }
     229             : 
     230           0 : static bool has_prefix(const std::string& lhs, const std::string& rhs)
     231             : {
     232           0 :   if (lhs.length() < rhs.length())
     233           0 :     return false;
     234             :   else
     235           0 :     return lhs.compare(0, rhs.length(), rhs) == 0;
     236             : }
     237             : 
     238             : void
     239           0 : POParser::parse_header(const std::string& header)
     240             : {
     241           0 :   std::string from_charset;
     242           0 :   std::string::size_type start = 0;
     243           0 :   for(std::string::size_type i = 0; i < header.length(); ++i)
     244             :   {
     245           0 :     if (header[i] == '\n')
     246             :     {
     247           0 :       std::string line = header.substr(start, i - start);
     248             : 
     249           0 :       if (has_prefix(line, "Content-Type:"))
     250             :       {
     251             :         // from_charset = line.substr(len);
     252           0 :         size_t len = strlen("Content-Type: text/plain; charset=");
     253           0 :         if (line.compare(0, len, "Content-Type: text/plain; charset=") == 0)
     254             :         {
     255           0 :           from_charset = line.substr(len);
     256             : 
     257           0 :           for(std::string::iterator ch = from_charset.begin(); ch != from_charset.end(); ++ch)
     258           0 :             *ch = static_cast<char>(toupper(*ch));
     259             :         }
     260             :         else
     261             :         {
     262           0 :           warning("malformed Content-Type header");
     263             :         }
     264             :       }
     265           0 :       else if (has_prefix(line, "Plural-Forms:"))
     266             :       {
     267           0 :         PluralForms plural_forms = PluralForms::from_string(line);
     268           0 :         if (!plural_forms)
     269             :         {
     270           0 :           warning("unknown Plural-Forms given");
     271             :         }
     272             :         else
     273             :         {
     274           0 :           if (!dict.get_plural_forms())
     275             :           {
     276           0 :             dict.set_plural_forms(plural_forms);
     277             :           }
     278             :           else
     279             :           {
     280           0 :             if (dict.get_plural_forms() != plural_forms)
     281             :             {
     282           0 :               warning("Plural-Forms missmatch between .po file and dictionary");
     283             :             }
     284             :           }
     285             :         }
     286             :       }
     287           0 :       start = i+1;
     288             :     }
     289             :   }
     290             : 
     291           0 :   if (from_charset.empty() || from_charset == "CHARSET")
     292             :   {
     293           0 :     warning("charset not specified for .po, fallback to utf-8");
     294           0 :     from_charset = "UTF-8";
     295             :   }
     296           0 :   else if (from_charset == "BIG5")
     297             :   {
     298           0 :     big5 = true;
     299             :   }
     300             : 
     301           0 :   conv.set_charsets(from_charset, dict.get_charset());
     302           0 : }
     303             : 
     304             : bool
     305           0 : POParser::is_empty_line()
     306             : {
     307           0 :   if (current_line.empty())
     308             :   {
     309           0 :     return true;
     310             :   }
     311           0 :   else if (current_line[0] == '#')
     312             :   { // handle comments as empty lines
     313           0 :     return (current_line.size() == 1 || (current_line.size() >= 2 && isspace(current_line[1])));
     314             :   }
     315             :   else
     316             :   {
     317           0 :     for(std::string::iterator i = current_line.begin(); i != current_line.end(); ++i)
     318             :     {
     319           0 :       if (!isspace(*i))
     320           0 :         return false;
     321             :     }
     322             :   }
     323           0 :   return true;
     324             : }
     325             : 
     326             : bool
     327           0 : POParser::prefix(const char* prefix_str)
     328             : {
     329           0 :   return current_line.compare(0, strlen(prefix_str), prefix_str) == 0;
     330             : }
     331             : 
     332             : void
     333           0 : POParser::parse()
     334             : {
     335           0 :   next_line();
     336             : 
     337             :   // skip UTF-8 intro that some text editors produce
     338             :   // see http://en.wikipedia.org/wiki/Byte-order_mark
     339           0 :   if (current_line.size() >= 3 &&
     340           0 :       current_line[0] == static_cast<char>(0xef) &&
     341           0 :       current_line[1] == static_cast<char>(0xbb) &&
     342           0 :       current_line[2] == static_cast<char>(0xbf))
     343             :   {
     344           0 :     current_line = current_line.substr(3);
     345             :   }
     346             : 
     347             :   // Parser structure
     348           0 :   while(!eof)
     349             :   {
     350             :     try
     351             :     {
     352           0 :       bool fuzzy =  false;
     353           0 :       bool has_msgctxt = false;
     354           0 :       std::string msgctxt;
     355           0 :       std::string msgid;
     356             : 
     357           0 :       while(prefix("#"))
     358             :       {
     359           0 :         if (current_line.size() >= 2 && current_line[1] == ',')
     360             :         {
     361             :           // FIXME: Rather simplistic hunt for fuzzy flag
     362           0 :           if (current_line.find("fuzzy", 2) != std::string::npos)
     363           0 :             fuzzy = true;
     364             :         }
     365             : 
     366           0 :         next_line();
     367             :       }
     368             : 
     369           0 :       if (!is_empty_line())
     370             :       {
     371           0 :         if (prefix("msgctxt"))
     372             :         {
     373           0 :           has_msgctxt = true;
     374           0 :           msgctxt = get_string(7);
     375             :         }
     376             : 
     377           0 :         if (prefix("msgid"))
     378           0 :           msgid = get_string(5);
     379             :         else
     380           0 :           error("expected 'msgid'");
     381             : 
     382           0 :         if (prefix("msgid_plural"))
     383             :         {
     384           0 :           std::string msgid_plural = get_string(12);
     385           0 :           std::vector<std::string> msgstr_num;
     386           0 :       bool saw_nonempty_msgstr = false;
     387             : 
     388           0 :         next:
     389           0 :           if (is_empty_line())
     390             :           {
     391           0 :             if (msgstr_num.empty())
     392           0 :               error("expected 'msgstr[N] (0 <= N <= 9)'");
     393             :           }
     394           0 :           else if (prefix("msgstr[") &&
     395           0 :                    current_line.size() > 8 &&
     396           0 :                    isdigit(current_line[7]) && current_line[8] == ']')
     397             :           {
     398           0 :             unsigned int number = static_cast<unsigned int>(current_line[7] - '0');
     399           0 :         std::string msgstr = get_string(9);
     400             : 
     401           0 :         if(!msgstr.empty())
     402           0 :           saw_nonempty_msgstr = true;
     403             : 
     404           0 :             if (number >= msgstr_num.size())
     405           0 :               msgstr_num.resize(number+1);
     406             : 
     407           0 :             msgstr_num[number] = conv.convert(msgstr);
     408           0 :             goto next;
     409             :           }
     410             :           else
     411             :           {
     412           0 :             error("expected 'msgstr[N]'");
     413             :           }
     414             : 
     415           0 :           if (!is_empty_line())
     416           0 :             error("expected 'msgstr[N]' or empty line");
     417             : 
     418           0 :       if (saw_nonempty_msgstr)
     419             :       {
     420           0 :         if (use_fuzzy || !fuzzy)
     421             :             {
     422           0 :           if (!dict.get_plural_forms())
     423             :           {
     424           0 :         warning("msgstr[N] seen, but no Plural-Forms given");
     425             :           }
     426             :           else
     427             :           {
     428           0 :         if (msgstr_num.size() != dict.get_plural_forms().get_nplural())
     429             :         {
     430           0 :           warning("msgstr[N] count doesn't match Plural-Forms.nplural");
     431             :         }
     432             :           }
     433             : 
     434           0 :           if (has_msgctxt)
     435           0 :         dict.add_translation(msgctxt, msgid, msgid_plural, msgstr_num);
     436             :           else
     437           0 :         dict.add_translation(msgid, msgid_plural, msgstr_num);
     438             :         }
     439             : 
     440             :         if ((false))
     441             :         {
     442             :           std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl;
     443             :           std::cout << "msgid \"" << msgid << "\"" << std::endl;
     444             :           std::cout << "msgid_plural \"" << msgid_plural << "\"" << std::endl;
     445             :           for(std::vector<std::string>::size_type i = 0; i < msgstr_num.size(); ++i)
     446             :         std::cout << "msgstr[" << i << "] \"" << conv.convert(msgstr_num[i]) << "\"" << std::endl;
     447             :           std::cout << std::endl;
     448             :         }
     449             :       }
     450             :         }
     451           0 :         else if (prefix("msgstr"))
     452             :         {
     453           0 :           std::string msgstr = get_string(6);
     454             : 
     455           0 :           if (msgid.empty())
     456             :           {
     457           0 :             parse_header(msgstr);
     458             :           }
     459           0 :           else if(!msgstr.empty())
     460             :           {
     461           0 :             if (use_fuzzy || !fuzzy)
     462             :             {
     463           0 :               if (has_msgctxt)
     464           0 :                 dict.add_translation(msgctxt, msgid, conv.convert(msgstr));
     465             :               else
     466           0 :                 dict.add_translation(msgid, conv.convert(msgstr));
     467             :             }
     468             : 
     469             :             if ((false))
     470             :             {
     471             :               std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl;
     472             :               std::cout << "msgid \"" << msgid << "\"" << std::endl;
     473             :               std::cout << "msgstr \"" << conv.convert(msgstr) << "\"" << std::endl;
     474             :               std::cout << std::endl;
     475             :             }
     476             :           }
     477             :         }
     478             :         else
     479             :         {
     480           0 :           error("expected 'msgstr' or 'msgid_plural'");
     481             :         }
     482             :       }
     483             : 
     484           0 :       if (!is_empty_line())
     485           0 :         error("expected empty line");
     486             : 
     487           0 :       next_line();
     488             :     }
     489           0 :     catch(POParserError&)
     490             :     {
     491             :     }
     492             :   }
     493           0 : }
     494             : 
     495           3 : } // namespace tinygettext
     496             : 
     497             : /* EOF */

Generated by: LCOV version 1.13