BibleTime
thmltohtml.cpp
Go to the documentation of this file.
1 /*********
2 *
3 * In the name of the Father, and of the Son, and of the Holy Spirit.
4 *
5 * This file is part of BibleTime's source code, https://bibletime.info/
6 *
7 * Copyright 1999-2021 by the BibleTime developers.
8 * The BibleTime source code is licensed under the GNU General Public License
9 * version 2.0.
10 *
11 **********/
12 
13 #include "thmltohtml.h"
14 
15 #include <QRegularExpression>
16 #include <QRegularExpressionMatch>
17 #include <QUrl>
18 #include <utility>
19 #include "../../util/btassert.h"
20 #include "../config/btconfig.h"
21 #include "../drivers/cswordmoduleinfo.h"
22 #include "../managers/cswordbackend.h"
23 #include "../managers/referencemanager.h"
24 
25 // Sword includes:
26 #pragma GCC diagnostic push
27 #pragma GCC diagnostic ignored "-Wextra-semi"
28 #pragma GCC diagnostic ignored "-Wsuggest-override"
29 #pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
30 #ifdef __clang__
31 #pragma clang diagnostic push
32 #pragma clang diagnostic ignored "-Wsuggest-destructor-override"
33 #endif
34 #include <swmodule.h>
35 #include <utilstr.h>
36 #include <utilxml.h>
37 #include <versekey.h>
38 #ifdef __clang__
39 #pragma clang diagnostic pop
40 #endif
41 #pragma GCC diagnostic pop
42 
43 
44 namespace Filters {
45 
47  setEscapeStringCaseSensitive(true);
48  setPassThruUnknownEscapeString(true); //the HTML widget will render the HTML escape codes
49 
50  setTokenStart("<");
51  setTokenEnd(">");
52  setTokenCaseSensitive(true);
53 
54  addTokenSubstitute("/foreign", "</span>");
55 
56  removeTokenSubstitute("note");
57  removeTokenSubstitute("/note");
58 }
59 
60 char ThmlToHtml::processText(sword::SWBuf &buf, const sword::SWKey *key,
61  const sword::SWModule *module)
62 {
63  sword::ThMLHTML::processText(buf, key, module);
64 
65  if (auto * const m =
66  CSwordBackend::instance().findModuleByName(module->getName()))
67  {
68  // only parse if the module has strongs or lemmas:
69  if (!m->has(CSwordModuleInfo::lemmas)
71  return 1;
72  }
73 
74  QStringList list;
75  {
76  auto t = QString::fromUtf8(buf.c_str());
77  {
78  static QRegularExpression const tag(
79  QStringLiteral(R"PCRE(([.,;]?<sync[^>]+(type|value)=)PCRE"
80  R"PCRE("([^"]+)"[^>]+(type|value)=)PCRE"
81  R"PCRE("([^"]+)"([^<]*)>)+)PCRE"));
82  QRegularExpressionMatch match;
83  auto pos = t.indexOf(tag, 0, &match);
84  if (pos == -1) //no strong or morph code found in this text
85  return 1; //WARNING: Return already here
86  do {
87  auto const partLength = pos + match.capturedLength();
88  list.append(t.left(partLength));
89  t.remove(0, partLength);
90  pos = t.indexOf(tag, 0, &match);
91  } while (pos != -1);
92  }
93 
94  // Append the trailing text to the list:
95  if (!t.isEmpty())
96  list.append(std::move(t));
97  }
98 
99  static QRegularExpression const tag(
100  QStringLiteral(R"PCRE(<sync[^>]+(type|value|class)="([^"]+)"[^>]+)PCRE"
101  R"PCRE((type|value|class)="([^"]+)"[^>]+)PCRE"
102  R"PCRE(((type|value|class)="([^"]+)")*([^<]*)>)PCRE"));
103  QString result;
104  for (auto & e : list) {
105 
106  // pass text ahead of <sync> stright through
107  if (auto const pos = e.indexOf(tag); pos > 0) {
108  result.append(e.left(pos));
109  e.remove(0, pos);
110  }
111 
112  // parse <sync> and change to <span>
113  bool hasLemmaAttr = false;
114  bool hasMorphAttr = false;
115 
116  QRegularExpressionMatch match;
117  auto pos = e.indexOf(tag, 0, &match);
118  bool insertedTag = false;
119 
120  while (pos != -1) {
121  bool isMorph = false;
122  bool isStrongs = false;
123  QString value;
124  QString valueClass;
125 
126  // check 3 attribute/value pairs
127 
128  for (int i = 1; i < 6; i += 2) {
129  if (i > 4)
130  i++;
131 
132  auto const attrName = match.captured(i);
133  auto const attrValue = match.captured(i + 1);
134  if (attrName == QStringLiteral("type")) {
135  isMorph = (attrValue == QStringLiteral("morph"));
136  isStrongs = (attrValue == QStringLiteral("Strongs"));
137  } else if (attrName == QStringLiteral("value")) {
138  value = attrValue;
139  } else if (attrName == QStringLiteral("class")) {
140  valueClass = attrValue;
141  } else { // optional 3rd attribute pair is not present:
142  BT_ASSERT(attrName.isEmpty());
143  }
144  }
145 
146  // prepend the class qualifier to the value
147  if (!valueClass.isEmpty())
148  value = QStringLiteral("%1:%2").arg(valueClass, value);
149 
150  if (value.isEmpty()) {
151  break;
152  }
153 
154  //insert the span
155  if (!insertedTag) {
156  e.replace(pos, match.capturedLength(), QStringLiteral("</span>"));
157  pos += 7;
158 
159  auto rep = QStringLiteral("<span lemma=\"%1\">").arg(value);
160  int startPos = 0;
161  QChar c = e[startPos];
162 
163  while ((startPos < pos) && (c.isSpace() || c.isPunct())) {
164  ++startPos;
165  c = e[startPos];
166  }
167 
168  hasLemmaAttr = isStrongs;
169  hasMorphAttr = isMorph;
170 
171  pos += rep.length();
172  e.insert(startPos, std::move(rep));
173  }
174  else { //add the attribute to the existing tag
175  e.remove(pos, match.capturedLength());
176 
177  if ((!isMorph && hasLemmaAttr) || (isMorph && hasMorphAttr)) { //we append another attribute value, e.g. 3000 gets 3000|5000
178  //search the existing attribute start
179  auto const & attrRegExp =
180  [isMorph]{
181  if (isMorph) {
182  static QRegularExpression const re(
183  QStringLiteral("morph=\".+?(?=\")"));
184  return re;
185  } else {
186  static QRegularExpression const re(
187  QStringLiteral("lemma=\".+?(?=\")"));
188  return re;
189  }
190  }();
191  QRegularExpressionMatch match;
192  const int foundAttrPos = e.indexOf(attrRegExp, pos, &match);
193 
194  if (foundAttrPos != -1) {
195  e.insert(foundAttrPos + match.capturedLength(),
196  QStringLiteral("|%1").arg(value));
197  pos += value.length() + 1;
198 
199  hasLemmaAttr = !isMorph;
200  hasMorphAttr = isMorph;
201  }
202  }
203  else { //attribute was not yet inserted
204  static QRegularExpression const re(
205  QStringLiteral("morph=|lemma="));
206  const int attrPos = e.indexOf(re, 0);
207 
208  if (attrPos >= 0) {
209  hasMorphAttr = isMorph;
210  hasLemmaAttr = !isMorph;
211 
212  auto attr = QStringLiteral("%1=\"%2\" ")
213  .arg(isMorph
214  ? QStringLiteral("morph")
215  : QStringLiteral("lemma"),
216  value);
217  pos += attr.length();
218  e.insert(attrPos, std::move(attr)); /// \bug e.replace() instead?
219  }
220  }
221  }
222 
223  insertedTag = true;
224  pos = e.indexOf(tag, pos, &match);
225  }
226 
227  result.append(std::move(e));
228  }
229 
230  if (!list.isEmpty())
231  buf = result.toUtf8();
232 
233  return 1;
234 }
235 
236 
237 bool ThmlToHtml::handleToken(sword::SWBuf &buf, const char *token,
238  sword::BasicFilterUserData *userData)
239 {
240  if (!substituteToken(buf, token) && !substituteEscapeString(buf, token)) {
241  sword::XMLTag const tag(token);
242  BT_ASSERT(dynamic_cast<UserData *>(userData));
243  UserData * const myUserData = static_cast<UserData *>(userData);
244  // Hack to be able to call stuff like Lang():
245  sword::SWModule const * const myModule =
246  const_cast<sword::SWModule *>(myUserData->module);
247  char const * const tagName = tag.getName();
248  if (!tagName) // unknown tag, pass through:
249  return sword::ThMLHTML::handleToken(buf, token, userData);
250  if (!sword::stricmp(tagName, "foreign")) {
251  // A text part in another language, we have to set the right font
252 
253  if (const char * const tagLang = tag.getAttribute("lang"))
254  buf.append("<span class=\"foreign\" lang=\"")
255  .append(tagLang)
256  .append("\">");
257  } else if (!sword::stricmp(tagName, "sync")) {
258  // If Morph or Strong or Lemma:
259  if (const char * const tagType = tag.getAttribute("type"))
260  if (!sword::stricmp(tagType, "morph")
261  || !sword::stricmp(tagType, "Strongs")
262  || !sword::stricmp(tagType, "lemma"))
263  buf.append('<').append(token).append('>');
264  } else if (!sword::stricmp(tagName, "note")) { // <note> tag
265  if (!tag.isEmpty()) {
266  if (!tag.isEndTag()) {
267  buf.append(" <span class=\"footnote\" note=\"")
268  .append(myModule->getName())
269  .append('/')
270  .append(myUserData->key->getShortText())
271  .append('/')
272  .append(QString::number(myUserData->swordFootnote).toUtf8().constData())
273  .append("\">*</span> ");
274 
275  myUserData->swordFootnote++;
276  myUserData->suspendTextPassThru = true;
277  myUserData->inFootnoteTag = true;
278  } else if (tag.isEndTag()) { // end tag
279  // buf += ")</span>";
280  myUserData->suspendTextPassThru = false;
281  myUserData->inFootnoteTag = false;
282  }
283  }
284  } else if (!sword::stricmp(tagName, "scripRef")) { // a scripRef
285  // scrip refs which are embeded in footnotes may not be displayed!
286 
287  if (!myUserData->inFootnoteTag) {
288  if (tag.isEndTag()) {
289  if (myUserData->inscriptRef) { // like "<scripRef passage="John 3:16">See John 3:16</scripRef>"
290  buf.append("</a></span>");
291 
292  myUserData->inscriptRef = false;
293  myUserData->suspendTextPassThru = false;
294  } else { // like "<scripRef>John 3:16</scripRef>"
295  if (CSwordModuleInfo const * const mod =
296  btConfig().getDefaultSwordModuleByType(
297  "standardBible"))
298  {
300  mod->name(),
301  // current module key:
302  QString::fromUtf8(myUserData->key->getText()),
303  myModule->getLanguage()};
304 
305  //it's ok to split the reference, because to descriptive text is given
306  bool insertSemicolon = false;
307  buf.append("<span class=\"crossreference\">");
308  QStringList const refs(
309  QString::fromUtf8(
310  myUserData->lastTextNode.c_str()).split(
311  ';'));
312  QString oldRef; // the previous reference to use as a base for the next refs
313  for (auto const & ref : refs) {
314  if (!oldRef.isEmpty())
315  options.refBase = oldRef; // Use the last ref as a base, e.g. Rom 1,2-3, when the next ref is only 3:3-10
316 
317  // Use the parsed result as the base for the next ref:
319  ref,
320  options);
321 
322  // Prepend a ref divider if we're after the first one
323  if (insertSemicolon)
324  buf.append("; ");
325 
326  buf.append("<a href=\"")
327  .append(
329  *mod,
330  oldRef
331  ).toUtf8().constData()
332  )
333  .append("\" crossrefs=\"")
334  .append(oldRef.toUtf8().constData())
335  .append("\">")
336  .append(ref.toUtf8().constData())
337  .append("</a>");
338  insertSemicolon = true;
339  }
340  buf.append("</span>"); //crossref end
341  }
342  myUserData->suspendTextPassThru = false;
343  }
344  } else if (tag.getAttribute("passage") ) {
345  // The passage was given as a parameter value
346  myUserData->inscriptRef = true;
347  myUserData->suspendTextPassThru = false;
348 
349  auto * mod = btConfig().getDefaultSwordModuleByType(
350  QStringLiteral("standardBible"));
351  if (! mod)
353 
354  if (mod) {
355  BT_ASSERT(tag.getAttribute("passage"));
356  QString const completeRef(
358  QString::fromUtf8(
359  tag.getAttribute("passage")),
361  mod->name(),
362  QString::fromUtf8(
363  myUserData->key->getText()),
364  myModule->getLanguage()}));
365  buf.append("<span class=\"crossreference\">")
366  .append("<a href=\"")
367  .append(
369  *mod,
370  completeRef
371  ).toUtf8().constData()
372  )
373  .append("\" crossrefs=\"")
374  .append(completeRef.toUtf8().constData())
375  .append("\">");
376  } else {
377  buf.append("<span><a>");
378  }
379  } else { // We're starting a scripRef like "<scripRef>John 3:16</scripRef>"
380  myUserData->inscriptRef = false;
381  /* Let's stop text from going to output, the text get's
382  added in the -tag handler: */
383  myUserData->suspendTextPassThru = true;
384  }
385  }
386  } else if (!sword::stricmp(tagName, "div")) {
387  if (tag.isEndTag()) {
388  buf.append("</div>");
389  } else if (char const * const tagClass = tag.getAttribute("class")){
390  if (!sword::stricmp(tagClass, "sechead") ) {
391  buf.append("<div class=\"sectiontitle\">");
392  } else if (!sword::stricmp(tagClass, "title")) {
393  buf.append("<div class=\"booktitle\">");
394  }
395  }
396  } else if (!sword::stricmp(tagName, "img") && tag.getAttribute("src")) {
397  const char * value = tag.getAttribute("src");
398 
399  if (value[0] == '/')
400  value++; //strip the first /
401 
402  if (!myUserData->absolutePath.has_value()) {
403  auto const * const absoluteDataPath =
404  myUserData->module->getConfigEntry("AbsoluteDataPath");
405  myUserData->absolutePath.emplace(
406  myUserData->module->isUnicode()
407  ? QString::fromUtf8(absoluteDataPath)
408  : QString::fromLatin1(absoluteDataPath));
409  }
410 
411  buf.append("<img src=\"")
412  .append(
413  QUrl::fromLocalFile(
414  QStringLiteral("%1/%2").arg(
415  *myUserData->absolutePath,
416  QString::fromUtf8(value))
417  ).toString().toUtf8().constData())
418  .append("\" />");
419  } else { // Let unknown token pass thru:
420  return sword::ThMLHTML::handleToken(buf, token, userData);
421  }
422  }
423  return true;
424 }
425 
426 } // namespace Filtes
#define BT_ASSERT(...)
Definition: btassert.h:17
BtConfig & btConfig()
This is a shortchand for BtConfig::getInstance().
Definition: btconfig.h:305
CSwordModuleInfo * getDefaultSwordModuleByType(const QString &moduleType)
Returns default sword module info class for a given module type.
Definition: btconfig.cpp:503
CSwordModuleInfo * findFirstAvailableModule(CSwordModuleInfo::ModuleType type)
static CSwordBackend & instance() noexcept
Definition: cswordbackend.h:98
static FilterOption const strongNumbers
static FilterOption const lemmas
std::optional< QString > absolutePath
Definition: thmltohtml.h:50
unsigned short int swordFootnote
Definition: thmltohtml.h:53
bool handleToken(sword::SWBuf &buf, const char *token, sword::BasicFilterUserData *userData) override
Definition: thmltohtml.cpp:237
char processText(sword::SWBuf &buf, const sword::SWKey *key, const sword::SWModule *module=nullptr) override
Definition: thmltohtml.cpp:60
QString parseVerseReference(QString const &ref, ParseOptions const &options)
QString encodeHyperlink(CSwordModuleInfo const &module, QString const &key)