BibleTime
osistohtml.cpp
Go to the documentation of this file.
1 /*********
2 *
3 * In the name of the Father, and of the Son, and of the Holy Spirit.
4 *
5 * This file is part of BibleTime's source code, https://bibletime.info/
6 *
7 * Copyright 1999-2021 by the BibleTime developers.
8 * The BibleTime source code is licensed under the GNU General Public License
9 * version 2.0.
10 *
11 **********/
12 
13 #include "osistohtml.h"
14 
15 #include <QString>
16 #include <string_view>
17 #include "../config/btconfig.h"
18 #include "../drivers/cswordmoduleinfo.h"
19 #include "../managers/cswordbackend.h"
20 #include "../managers/referencemanager.h"
21 
22 // Sword includes:
23 #pragma GCC diagnostic push
24 #pragma GCC diagnostic ignored "-Wsuggest-override"
25 #pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
26 #include <swbuf.h>
27 #include <swmodule.h>
28 #include <utilxml.h>
29 #pragma GCC diagnostic pop
30 
31 
32 namespace {
33 
34 template <typename UserData>
35 void renderReference(char const * const osisRef,
36  sword::SWBuf & buf,
37  sword::SWModule const & myModule,
38  UserData const & myUserData)
39 {
40  QString const ref(osisRef);
41  //BT_ASSERT(!ref.isEmpty()); checked later
42 
43  if (!ref.isEmpty()) {
44  /* find out the mod, using the current module makes sense if it's a
45  bible or commentary because the refs link into a bible by default.
46  If the osisRef is something like "ModuleID:key comes here" then the
47  modulename is given, so we'll use that one. */
48 
49  auto const * mod =
51  //BT_ASSERT(mod); checked later
52  if (!mod || (mod->type() != CSwordModuleInfo::Bible
53  && mod->type() != CSwordModuleInfo::Commentary))
54  {
56  QStringLiteral("standardBible"));
57  if (!mod)
60  }
61 
62  // BT_ASSERT(mod); There's no necessarily a module or standard Bible
63 
64  //if the osisRef like "GerLut:key" contains a module, use that
65  auto const pos = ref.indexOf(':');
66 
67  QString hrefRef;
68  if ((pos >= 0)
69  && ref.at(pos - 1).isLetter()
70  && ref.at(pos + 1).isLetter())
71  {
72  auto const newModuleName(ref.left(pos));
73  hrefRef = ref.mid(pos + 1);
74 
75  if (auto const * const moduleByName =
76  CSwordBackend::instance().findModuleByName(newModuleName))
77  mod = moduleByName;
78  } else {
79  hrefRef = ref;
80  }
81 
82  if (mod) {
83  using namespace ReferenceManager;
84  ParseOptions const options{
85  mod->name(),
86  QString::fromUtf8(myUserData.key->getText()),
87  myModule.getLanguage()};
88 
89  auto const hyperlink( // Hyperlink with key and mod
91  *mod,
92  parseVerseReference(hrefRef, options)).toUtf8());
93 
94  // Ref must contain the osisRef module marker if there was any:
95  auto const moduleMarker(parseVerseReference(ref, options).toUtf8());
96 
97  buf.append("<a href=\"")
98  .append(hyperlink.constData())
99  .append("\" crossrefs=\"")
100  .append(moduleMarker.constData())
101  .append("\">");
102  }
103  /** \todo Should we add something if there were no referenced module
104  available? */
105  }
106 } // renderReference()
107 
108 } // anonymous namespace
109 
111  setPassThruUnknownEscapeString(true); //the HTML widget will render the HTML escape codes
112 
113  addTokenSubstitute("inscription", "<span class=\"inscription\">");
114  addTokenSubstitute("/inscription", "</span>");
115 
116  addTokenSubstitute("mentioned", "<span class=\"mentioned\">");
117  addTokenSubstitute("/mentioned", "</span>");
118 
119 // addTokenSubstitute("divineName", "<span class=\"name\"><span class=\"divine\">");
120 // addTokenSubstitute("/divineName", "</span></span>");
121 
122  /// \todo Move that down to the real tag handling, segs without the type morph would generate incorrect markup, as the end span is always inserted
123 // addTokenSubstitute("seg type=\"morph\"", "<span class=\"morphSegmentation\">");
124 // addTokenSubstitute("/seg", "</span>");
125 
126  // OSIS tables
127  addTokenSubstitute("table", "<table>");
128  addTokenSubstitute("/table", "</table>");
129  addTokenSubstitute("row", "<tr>");
130  addTokenSubstitute("/row", "</tr>");
131  addTokenSubstitute("cell", "<td>");
132  addTokenSubstitute("/cell", "</td>");
133 
134 }
135 
136 bool Filters::OsisToHtml::handleToken(sword::SWBuf &buf, const char *token, sword::BasicFilterUserData *userData) {
137  using namespace std::literals;
138  // manually process if it wasn't a simple substitution
139 
140  if (!substituteToken(buf, token)) {
141  UserData* myUserData = dynamic_cast<UserData*>(userData);
142  sword::SWModule* myModule = const_cast<sword::SWModule*>(myUserData->module); //hack
143 
144  sword::XMLTag const tag(token);
145  // qWarning("found %s", token);
146  auto const osisQToTickEntry =
147  userData->module->getConfigEntry("OSISqToTick");
148  bool const osisQToTick =
149  !osisQToTickEntry || osisQToTickEntry != "false"sv;
150 
151  std::string_view const tagName(tag.getName());
152 
153  if (tagName == "div"sv) {
154  if (tag.isEndTag()) {
155  buf.append("</div>");
156  } else {
157  sword::SWBuf type( tag.getAttribute("type") );
158  if (type == "introduction") {
159  if (!tag.isEmpty())
160  buf.append("<div class=\"introduction\">");
161  } else if (type == "chapter") {
162  if (!tag.isEmpty())
163  buf.append("<div class=\"chapter\" ></div>"); //don't open a div here, that would lead to a broken XML structure
164  } else if (type == "x-p") {
165  buf.append("<br/>");
166  } else if (type == "paragraph") {
167  if (tag.getAttribute("sID"))
168  buf.append("<p>");
169  else if (tag.getAttribute("eID"))
170  buf.append("</p>");
171  } else {
172  buf.append("<div>");
173  }
174  }
175  }
176  else if (tagName == "w"sv) {
177  if ((!tag.isEmpty()) && (!tag.isEndTag())) { //start tag
178  const char *attrib;
179  const char *val;
180 
181  sword::XMLTag outTag("span");
182  sword::SWBuf attrValue;
183 
184  if ((attrib = tag.getAttribute("xlit"))) {
185  val = strchr(attrib, ':');
186  val = (val) ? (val + 1) : attrib;
187  outTag.setAttribute("xlit", val);
188  }
189 
190  if ((attrib = tag.getAttribute("gloss"))) {
191  val = strchr(attrib, ':');
192  val = (val) ? (val + 1) : attrib;
193  outTag.setAttribute("gloss", val);
194  }
195 
196  if ((attrib = tag.getAttribute("lemma"))) {
197  char splitChar = '|';
198  const int countSplit1 = tag.getAttributePartCount("lemma", '|');
199  const int countSplit2 = tag.getAttributePartCount("lemma", ' '); /// \todo not allowed, remove soon
200  int count = 0;
201 
202  if (countSplit1 > countSplit2) { //| split char
203  splitChar = '|'; /// \todo not allowed, remove soon
204  count = countSplit1;
205  }
206  else {
207  splitChar = ' ';
208  count = countSplit2;
209  }
210 
211  int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
212  attrValue = "";
213 
214  do {
215  if (attrValue.length()) {
216  attrValue.append( '|' );
217  }
218 
219  attrib = tag.getAttribute("lemma", i, splitChar);
220 
221  if (i < 0) { // to handle our -1 condition
222  i = 0;
223  }
224 
225  val = strchr(attrib, ':');
226  val = (val) ? (val + 1) : attrib;
227 
228  attrValue.append(val);
229  }
230  while (++i < count);
231 
232  if (attrValue.length()) {
233  outTag.setAttribute("lemma", attrValue.c_str());
234  }
235  }
236 
237  if ((attrib = tag.getAttribute("morph"))) {
238  char splitChar = '|';
239  const int countSplit1 = tag.getAttributePartCount("morph", '|');
240  const int countSplit2 = tag.getAttributePartCount("morph", ' '); /// \todo not allowed, remove soon
241  int count = 0;
242 
243  if (countSplit1 > countSplit2) { //| split char
244  splitChar = '|';
245  count = countSplit1;
246  }
247  else {
248  splitChar = ' ';
249  count = countSplit2;
250  }
251 
252  int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
253 
254  attrValue = "";
255 
256  do {
257  if (attrValue.length()) {
258  attrValue.append('|');
259  }
260 
261  attrib = tag.getAttribute("morph", i, splitChar);
262 
263  if (i < 0) {
264  i = 0; // to handle our -1 condition
265  }
266 
267  val = strchr(attrib, ':');
268 
269  if (val) { //the prefix gives the modulename
270  //check the prefix
271  if (!strncmp("robinson:", attrib, 9)) { //robinson
272  attrValue.append( "Robinson:" ); //work is not the same as Sword's module name
273  attrValue.append( val + 1 );
274  }
275  //strongs is handled by BibleTime
276  /*else if (!strncmp("strongs", attrib, val-atrrib)) {
277  attrValue.append( !strncmp(attrib, "x-", 2) ? attrib+2 : attrib );
278  }*/
279  else {
280  attrValue.append( !strncmp(attrib, "x-", 2) ? attrib + 2 : attrib );
281  }
282  }
283  else { //no prefix given
284  val = attrib;
285  const bool skipFirst = ((val[0] == 'T') && ((val[1] == 'H') || (val[1] == 'G')));
286  attrValue.append( skipFirst ? val + 1 : val );
287  }
288  }
289  while (++i < count);
290 
291  if (attrValue.length()) {
292  outTag.setAttribute("morph", attrValue.c_str());
293  }
294  }
295 
296  if ((attrib = tag.getAttribute("POS"))) {
297  val = strchr(attrib, ':');
298  val = (val) ? (val + 1) : attrib;
299  outTag.setAttribute("pos", val);
300  }
301 
302  buf.append( outTag.toString() );
303  }
304  else if (tag.isEndTag()) { // end or empty <w> tag
305  buf.append("</span>");
306  }
307  }
308  else if (tagName == "note"sv) {
309  if (!tag.isEndTag()) { //start tag
310  const sword::SWBuf type( tag.getAttribute("type") );
311 
312  if (type == "crossReference") { //note containing cross references
313  myUserData->inCrossrefNote = true;
314  myUserData->noteTypes.emplace_back(UserData::CrossReference);
315 
316  /*
317  * Do not count crossrefs as footnotes if they are displayed in the text. This will cause problems
318  * with footnote numbering when crossrefs are turned on/off.
319  * When accessing footnotes, crossrefs must be turned off in the filter so that they are not in the entry
320  * attributes of Sword.
321  *
322  * //myUserData->swordFootnote++; // cross refs count as notes, too
323  */
324 
325  buf.append("<span class=\"crossreference\">");
326  sword::SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
327  sword::SWBuf footnoteBody = myUserData->entryAttributes["Footnote"][footnoteNumber]["body"];
328  buf += myModule->renderText(footnoteBody);
329  }
330 
331  /* else if (type == "explanation") {
332  }
333  */
334  else if ((type == "strongsMarkup") || (type == "x-strongsMarkup")) {
335  /**
336  * leave strong's markup notes out, in the future we'll probably have
337  * different option filters to turn different note types on or off
338  */
339 
340  myUserData->suspendTextPassThru = true;
341  myUserData->noteTypes.emplace_back(UserData::StrongsMarkup);
342  }
343 
344  else {
345  // qWarning("found note in %s", myUserData->key->getShortText());
346  buf.append(" <span class=\"footnote\" note=\"");
347  buf.append(myModule->getName());
348  buf.append('/');
349  buf.append(myUserData->key->getShortText());
350  buf.append('/');
351  buf.append( QString::number(myUserData->swordFootnote++).toUtf8().constData() ); //inefficient
352 
353  const sword::SWBuf n = tag.getAttribute("n");
354 
355  buf.append("\">");
356  buf.append( (n.length() > 0) ? n.c_str() : "*" );
357  buf.append("</span> ");
358 
359  myUserData->noteTypes.emplace_back(UserData::Footnote);
360  myUserData->suspendTextPassThru = true;
361  }
362  }
363  else if (/* tag.isEndTag() && */ !myUserData->noteTypes.empty()) {
364  if (myUserData->noteTypes.back() == UserData::CrossReference) {
365  buf.append("</span> ");
366 // myUserData->suspendTextPassThru = false;
367  myUserData->inCrossrefNote = false;
368  }
369 
370  myUserData->noteTypes.pop_back();
371  myUserData->suspendTextPassThru = false;
372  }
373  }
374  else if (tagName == "reference"sv) {
375  if (!tag.isEndTag() && !tag.isEmpty()) {
376  renderReference(tag.getAttribute("osisRef"),
377  buf,
378  *myModule,
379  *myUserData);
380  }
381  else if (tag.isEndTag()) {
382  buf.append("</a>");
383  }
384  else { // empty reference marker
385  // -- what should we do? nothing for now.
386  }
387  }
388  else if (tagName == "title"sv) {
389  if (!tag.isEndTag() && !tag.isEmpty()) {
390  buf.append("<div class=\"sectiontitle\">");
391  }
392  else if (tag.isEndTag()) {
393  buf.append("</div>");
394  }
395  else { // empty title marker
396  // what to do? is this even valid?
397  buf.append("<br/>");
398  }
399  }
400  else if (tagName == "hi"sv) { // <hi> highlighted text
401  const sword::SWBuf type = tag.getAttribute("type");
402 
403  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
404  if (type == "bold") {
405  buf.append("<span class=\"bold\">");
406  }
407  else if (type == "illuminated") {
408  buf.append("<span class=\"illuminated\">");
409  }
410  else if (type == "italic") {
411  buf.append("<span class=\"italic\">");
412  }
413  else if (type == "line-through") {
414  buf.append("<span class=\"line-through\">");
415  }
416  else if (type == "normal") {
417  buf.append("<span class=\"normal\">");
418  }
419  else if (type == "small-caps") {
420  buf.append("<span class=\"small-caps\">");
421  }
422  else if (type == "underline") {
423  buf.append("<span class=\"underline\">");
424  }
425  else {
426  buf.append("<span>"); //don't break markup, </span> is inserted later
427  }
428  }
429  else if (tag.isEndTag()) { //all hi replacements are html spans
430  buf.append("</span>");
431  }
432  }
433  else if (tagName == "name"sv) {
434  const sword::SWBuf type = tag.getAttribute("type");
435 
436  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
437  if (type == "geographic") {
438  buf.append("<span class=\"name\"><span class=\"geographic\">");
439  }
440  else if (type == "holiday") {
441  buf.append("<span class=\"name\"><span class=\"holiday\">");
442  }
443  else if (type == "nonhuman") {
444  buf.append("<span class=\"name\"><span class=\"nonhuman\">");
445  }
446  else if (type == "person") {
447  buf.append("<span class=\"name\"><span class=\"person\">");
448  }
449  else if (type == "ritual") {
450  buf.append("<span class=\"name\"><span class=\"ritual\">");
451  }
452  else {
453  buf.append("<span class=\"name\"><span>");
454  }
455  }
456  else if (tag.isEndTag()) { //all hi replacements are html spans
457  buf.append("</span></span> ");
458  }
459  }
460  else if (tagName == "transChange"sv) {
461  sword::SWBuf type( tag.getAttribute("type") );
462 
463  if ( !type.length() ) {
464  type = tag.getAttribute("changeType");
465  }
466 
467  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
468  if (type == "added") {
469  buf.append("<span class=\"transchange\" title=\"");
470  buf.append(QObject::tr("Added text").toUtf8().constData());
471  buf.append("\"><span class=\"added\">");
472  }
473  else if (type == "amplified") {
474  buf.append("<span class=\"transchange\"><span class=\"amplified\">");
475  }
476  else if (type == "changed") {
477  buf.append("<span class=\"transchange\"><span class=\"changed\">");
478  }
479  else if (type == "deleted") {
480  buf.append("<span class=\"transchange\"><span class=\"deleted\">");
481  }
482  else if (type == "moved") {
483  buf.append("<span class=\"transchange\"><span class=\"moved\">");
484  }
485  else if (type == "tenseChange") {
486  buf.append("<span class=\"transchange\" title=\"");
487  buf.append(QObject::tr("Verb tense changed").toUtf8().constData());
488  buf.append("\"><span class=\"tenseChange\">");
489  }
490  else {
491  buf.append("<span class=\"transchange\"><span>");
492  }
493  }
494  else if (tag.isEndTag()) { //all hi replacements are html spans
495  buf.append("</span></span>");
496  }
497  }
498  else if (tagName == "p"sv) {
499  if (tag.isEndTag())
500  buf.append("</p>");
501  else
502  buf.append("<p>");
503 
504  }
505  else if (tagName == "q"sv) { // <q> quote
506  //sword::SWBuf type = tag.getAttribute("type");
507  sword::SWBuf who = tag.getAttribute("who");
508  const char *lev = tag.getAttribute("level");
509  int level = (lev) ? atoi(lev) : 1;
510  sword::SWBuf quoteMarker = tag.getAttribute("marker");
511 
512  if ((!tag.isEndTag())) {
513  if (!tag.isEmpty()) {
514  myUserData->quote.who = who;
515  }
516 
517  if (quoteMarker.size() > 0) {
518  buf.append(quoteMarker);
519  }
520  else if (osisQToTick) //alternate " and '
521  buf.append((level % 2) ? '\"' : '\'');
522 
523  if (who == "Jesus") {
524  buf.append("<span class=\"jesuswords\">");
525  }
526  }
527  else if (tag.isEndTag()) {
528  if (myUserData->quote.who == "Jesus") {
529  buf.append("</span>");
530  }
531  if (quoteMarker.size() > 0) {
532  buf.append(quoteMarker);
533  }
534  else if (osisQToTick) { //alternate " and '
535  buf.append((level % 2) ? '\"' : '\'');
536  }
537 
538  myUserData->quote.who = "";
539  }
540  }
541  else if (tagName == "abbr"sv) {
542  if (!tag.isEndTag() && !tag.isEmpty()) {
543  const sword::SWBuf expansion = tag.getAttribute("expansion");
544 
545  buf.append("<span class=\"abbreviation\" expansion=\"");
546  buf.append(expansion);
547  buf.append("\">");
548  }
549  else if (tag.isEndTag()) {
550  buf.append("</span>");
551  }
552  }
553  else if (tagName == "milestone"sv) {
554  const sword::SWBuf type = tag.getAttribute("type");
555 
556  if ((type == "screen") || (type == "line")) {//line break
557  buf.append("<br/>");
558  userData->supressAdjacentWhitespace = true;
559  }
560  else if (type == "x-p") { //e.g. occurs in the KJV2006 module
561  //buf.append("<br/>");
562  const sword::SWBuf marker = tag.getAttribute("marker");
563  if (marker.length() > 0) {
564  buf.append(marker);
565  }
566  }
567  }
568  else if (tagName == "seg"sv) {
569  if (!tag.isEndTag() && !tag.isEmpty()) {
570 
571  const sword::SWBuf type = tag.getAttribute("type");
572 
573  if (type == "morph") {//line break
574  //This code is for WLC and MORPH (WHI)
575  sword::XMLTag outTag("span");
576  outTag.setAttribute("class", "morphSegmentation");
577  const char* attrValue;
578  //Transfer the values to the span
579  //Problem: the data is in hebrew/aramaic, how to encode in HTML/BibleTime?
580  if ((attrValue = tag.getAttribute("lemma"))) outTag.setAttribute("lemma", attrValue);
581  if ((attrValue = tag.getAttribute("morph"))) outTag.setAttribute("morph", attrValue);
582  if ((attrValue = tag.getAttribute("homonym"))) outTag.setAttribute("homonym", attrValue);
583 
584  buf.append(outTag.toString());
585  //buf.append("<span class=\"morphSegmentation\">");
586  }
587  else {
588  buf.append("<span>");
589  }
590  }
591  else { // seg end tag
592  buf.append("</span>");
593  }
594  //qWarning(QString("handled <seg> token. result: %1").arg(buf.c_str()).latin1());
595  }
596  //divine name, don't use simple tag replacing because it may have attributes
597  else if (tagName == "divineName"sv) {
598  if (!tag.isEndTag()) {
599  buf.append("<span class=\"name\"><span class=\"divine\">");
600  }
601  else { //all hi replacements are html spans
602  buf.append("</span></span>");
603  }
604  }
605  else { //all tokens handled by OSISHTMLHref will run through the filter now
606  return sword::OSISHTMLHREF::handleToken(buf, token, userData);
607  }
608  }
609 
610  return false;
611 }
BtConfig & btConfig()
This is a shortchand for BtConfig::getInstance().
Definition: btconfig.h:305
CSwordModuleInfo * getDefaultSwordModuleByType(const QString &moduleType)
Returns default sword module info class for a given module type.
Definition: btconfig.cpp:503
CSwordModuleInfo * findFirstAvailableModule(CSwordModuleInfo::ModuleType type)
static CSwordBackend & instance() noexcept
Definition: cswordbackend.h:98
CSwordModuleInfo * findSwordModuleByPointer(const sword::SWModule *const swmodule) const
Searches for a module with the given sword module as module().
unsigned short int swordFootnote
Definition: osistohtml.h:52
sword::AttributeTypeList entryAttributes
Definition: osistohtml.h:54
std::vector< NoteType > noteTypes
Definition: osistohtml.h:62
struct Filters::OsisToHtml::UserData::@0 quote
bool handleToken(sword::SWBuf &buf, const char *token, sword::BasicFilterUserData *userData) override
Definition: osistohtml.cpp:136
QString parseVerseReference(QString const &ref, ParseOptions const &options)
QString encodeHyperlink(CSwordModuleInfo const &module, QString const &key)
void renderReference(char const *const osisRef, sword::SWBuf &buf, sword::SWModule const &myModule, UserData const &myUserData)
Definition: osistohtml.cpp:35