BibleTime
osistohtml.cpp
Go to the documentation of this file.
1/*********
2*
3* In the name of the Father, and of the Son, and of the Holy Spirit.
4*
5* This file is part of BibleTime's source code, https://bibletime.info/
6*
7* Copyright 1999-2025 by the BibleTime developers.
8* The BibleTime source code is licensed under the GNU General Public License
9* version 2.0.
10*
11**********/
12
13#include "osistohtml.h"
14
15#include <QString>
16#include <string_view>
17#include "../config/btconfig.h"
18#include "../drivers/cswordmoduleinfo.h"
19#include "../managers/cswordbackend.h"
20#include "../managers/referencemanager.h"
21
22// Sword includes:
23#pragma GCC diagnostic push
24#pragma GCC diagnostic ignored "-Wsuggest-override"
25#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
26#include <swbuf.h>
27#include <swmodule.h>
28#include <utilxml.h>
29#pragma GCC diagnostic pop
30
31
32namespace {
33
34template <typename UserData>
35void renderReference(char const * const osisRef,
36 sword::SWBuf & buf,
37 sword::SWModule const & myModule,
38 UserData const & myUserData)
39{
40 QString const ref(osisRef);
41 //BT_ASSERT(!ref.isEmpty()); checked later
42
43 if (!ref.isEmpty()) {
44 /* find out the mod, using the current module makes sense if it's a
45 bible or commentary because the refs link into a bible by default.
46 If the osisRef is something like "ModuleID:key comes here" then the
47 modulename is given, so we'll use that one. */
48
49 auto const * mod =
51 //BT_ASSERT(mod); checked later
52 if (!mod || (mod->type() != CSwordModuleInfo::Bible
53 && mod->type() != CSwordModuleInfo::Commentary))
54 {
56 QStringLiteral("standardBible"));
57 if (!mod)
60 }
61
62 // BT_ASSERT(mod); There's no necessarily a module or standard Bible
63
64 //if the osisRef like "GerLut:key" contains a module, use that
65 auto const pos = ref.indexOf(':');
66
67 QString hrefRef;
68 if ((pos >= 0)
69 && ref.at(pos - 1).isLetter()
70 && ref.at(pos + 1).isLetter())
71 {
72 auto const newModuleName(ref.left(pos));
73 hrefRef = ref.mid(pos + 1);
74
75 if (auto const * const moduleByName =
76 CSwordBackend::instance().findModuleByName(newModuleName))
77 mod = moduleByName;
78 } else {
79 hrefRef = ref;
80 }
81
82 if (mod) {
83 using namespace ReferenceManager;
84 ParseOptions const options{
85 mod->name(),
86 QString::fromUtf8(myUserData.key->getText()),
87 myModule.getLanguage()};
88
89 auto const hyperlink( // Hyperlink with key and mod
90 encodeHyperlink(
91 *mod,
92 parseVerseReference(hrefRef, options)).toUtf8());
93
94 // Ref must contain the osisRef module marker if there was any:
95 auto const moduleMarker(parseVerseReference(ref, options).toUtf8());
96
97 buf.append("<a href=\"")
98 .append(hyperlink.constData())
99 .append("\" crossrefs=\"")
100 .append(moduleMarker.constData())
101 .append("\">");
102 }
103 /** \todo Should we add something if there were no referenced module
104 available? */
105 }
106} // renderReference()
107
108} // anonymous namespace
109
111 setPassThruUnknownEscapeString(true); //the HTML widget will render the HTML escape codes
112
113 addTokenSubstitute("inscription", "<span class=\"inscription\">");
114 addTokenSubstitute("/inscription", "</span>");
115
116 addTokenSubstitute("mentioned", "<span class=\"mentioned\">");
117 addTokenSubstitute("/mentioned", "</span>");
118
119// addTokenSubstitute("divineName", "<span class=\"name\"><span class=\"divine\">");
120// addTokenSubstitute("/divineName", "</span></span>");
121
122 /// \todo Move that down to the real tag handling, segs without the type morph would generate incorrect markup, as the end span is always inserted
123// addTokenSubstitute("seg type=\"morph\"", "<span class=\"morphSegmentation\">");
124// addTokenSubstitute("/seg", "</span>");
125
126 // OSIS tables
127 addTokenSubstitute("table", "<table>");
128 addTokenSubstitute("/table", "</table>");
129 addTokenSubstitute("row", "<tr>");
130 addTokenSubstitute("/row", "</tr>");
131 addTokenSubstitute("cell", "<td>");
132 addTokenSubstitute("/cell", "</td>");
133
134}
135
136bool Filters::OsisToHtml::handleToken(sword::SWBuf &buf, const char *token, sword::BasicFilterUserData *userData) {
137 using namespace std::literals;
138 // manually process if it wasn't a simple substitution
139
140 if (!substituteToken(buf, token)) {
141 UserData* myUserData = static_cast<UserData*>(userData);
142 sword::SWModule* myModule = const_cast<sword::SWModule*>(myUserData->module); //hack
143
144 sword::XMLTag const tag(token);
145 // qWarning("found %s", token);
146 auto const osisQToTickEntry =
147 userData->module->getConfigEntry("OSISqToTick");
148 bool const osisQToTick =
149 !osisQToTickEntry || osisQToTickEntry != "false"sv;
150
151 std::string_view const tagName(tag.getName());
152
153 if (tagName == "div"sv) {
154 if (tag.isEndTag()) {
155 buf.append("</div>");
156 } else {
157 sword::SWBuf type( tag.getAttribute("type") );
158 if (type == "introduction") {
159 if (!tag.isEmpty())
160 buf.append("<div class=\"introduction\">");
161 } else if (type == "chapter") {
162 if (!tag.isEmpty())
163 buf.append("<div class=\"chapter\" ></div>"); //don't open a div here, that would lead to a broken XML structure
164 } else if (type == "x-p") {
165 buf.append("<br/>");
166 } else if (type == "paragraph") {
167 if (tag.getAttribute("sID"))
168 buf.append("<p>");
169 else if (tag.getAttribute("eID"))
170 buf.append("</p>");
171 } else {
172 buf.append("<div>");
173 }
174 }
175 }
176 else if (tagName == "w"sv) {
177 if ((!tag.isEmpty()) && (!tag.isEndTag())) { //start tag
178 const char *attrib;
179 const char *val;
180
181 sword::XMLTag outTag("span");
182 sword::SWBuf attrValue;
183
184 if ((attrib = tag.getAttribute("xlit"))) {
185 val = strchr(attrib, ':');
186 val = (val) ? (val + 1) : attrib;
187 outTag.setAttribute("xlit", val);
188 }
189
190 if ((attrib = tag.getAttribute("gloss"))) {
191 val = strchr(attrib, ':');
192 val = (val) ? (val + 1) : attrib;
193 outTag.setAttribute("gloss", val);
194 }
195
196 if ((attrib = tag.getAttribute("lemma"))) {
197 char splitChar = '|';
198 const int countSplit1 = tag.getAttributePartCount("lemma", '|');
199 const int countSplit2 = tag.getAttributePartCount("lemma", ' '); /// \todo not allowed, remove soon
200 int count = 0;
201
202 if (countSplit1 > countSplit2) { //| split char
203 splitChar = '|'; /// \todo not allowed, remove soon
204 count = countSplit1;
205 }
206 else {
207 splitChar = ' ';
208 count = countSplit2;
209 }
210
211 int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
212 attrValue = "";
213
214 do {
215 if (attrValue.length()) {
216 attrValue.append( '|' );
217 }
218
219 attrib = tag.getAttribute("lemma", i, splitChar);
220
221 if (i < 0) { // to handle our -1 condition
222 i = 0;
223 }
224
225 val = strchr(attrib, ':');
226 val = (val) ? (val + 1) : attrib;
227
228 attrValue.append(val);
229 }
230 while (++i < count);
231
232 if (attrValue.length()) {
233 outTag.setAttribute("lemma", attrValue.c_str());
234 }
235 }
236
237 if ((attrib = tag.getAttribute("morph"))) {
238 char splitChar = '|';
239 const int countSplit1 = tag.getAttributePartCount("morph", '|');
240 const int countSplit2 = tag.getAttributePartCount("morph", ' '); /// \todo not allowed, remove soon
241 int count = 0;
242
243 if (countSplit1 > countSplit2) { //| split char
244 splitChar = '|';
245 count = countSplit1;
246 }
247 else {
248 splitChar = ' ';
249 count = countSplit2;
250 }
251
252 int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
253
254 attrValue = "";
255
256 do {
257 if (attrValue.length()) {
258 attrValue.append('|');
259 }
260
261 attrib = tag.getAttribute("morph", i, splitChar);
262
263 if (i < 0) {
264 i = 0; // to handle our -1 condition
265 }
266
267 val = strchr(attrib, ':');
268
269 if (val) { //the prefix gives the modulename
270 //check the prefix
271 if (!strncmp("robinson:", attrib, 9)) { //robinson
272 attrValue.append( "Robinson:" ); //work is not the same as Sword's module name
273 attrValue.append( val + 1 );
274 }
275 //strongs is handled by BibleTime
276 /*else if (!strncmp("strongs", attrib, val-atrrib)) {
277 attrValue.append( !strncmp(attrib, "x-", 2) ? attrib+2 : attrib );
278 }*/
279 else {
280 attrValue.append( !strncmp(attrib, "x-", 2) ? attrib + 2 : attrib );
281 }
282 }
283 else { //no prefix given
284 val = attrib;
285 const bool skipFirst = ((val[0] == 'T') && ((val[1] == 'H') || (val[1] == 'G')));
286 attrValue.append( skipFirst ? val + 1 : val );
287 }
288 }
289 while (++i < count);
290
291 if (attrValue.length()) {
292 outTag.setAttribute("morph", attrValue.c_str());
293 }
294 }
295
296 if ((attrib = tag.getAttribute("POS"))) {
297 val = strchr(attrib, ':');
298 val = (val) ? (val + 1) : attrib;
299 outTag.setAttribute("pos", val);
300 }
301
302 buf.append( outTag.toString() );
303 }
304 else if (tag.isEndTag()) { // end or empty <w> tag
305 buf.append("</span>");
306 }
307 }
308 else if (tagName == "note"sv) {
309 if (!tag.isEndTag()) { //start tag
310 const sword::SWBuf type( tag.getAttribute("type") );
311
312 if (type == "crossReference") { //note containing cross references
313 myUserData->inCrossrefNote = true;
314 myUserData->noteTypes.emplace_back(UserData::CrossReference);
315
316 /*
317 * Do not count crossrefs as footnotes if they are displayed in the text. This will cause problems
318 * with footnote numbering when crossrefs are turned on/off.
319 * When accessing footnotes, crossrefs must be turned off in the filter so that they are not in the entry
320 * attributes of Sword.
321 *
322 * //myUserData->swordFootnote++; // cross refs count as notes, too
323 */
324
325 buf.append("<span class=\"crossreference\">");
326 sword::SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
327 sword::SWBuf footnoteBody = myUserData->entryAttributes["Footnote"][footnoteNumber]["body"];
328 buf += myModule->renderText(footnoteBody);
329 }
330
331 /* else if (type == "explanation") {
332 }
333 */
334 else if ((type == "strongsMarkup") || (type == "x-strongsMarkup")) {
335 /**
336 * leave strong's markup notes out, in the future we'll probably have
337 * different option filters to turn different note types on or off
338 */
339
340 myUserData->suspendTextPassThru = true;
341 myUserData->noteTypes.emplace_back(UserData::StrongsMarkup);
342 }
343
344 else {
345 // qWarning("found note in %s", myUserData->key->getShortText());
346 buf.append(" <span class=\"footnote\" note=\"");
347 buf.append(myModule->getName());
348 buf.append('/');
349 buf.append(myUserData->key->getShortText());
350 buf.append('/');
351 buf.append( QString::number(myUserData->swordFootnote++).toUtf8().constData() ); //inefficient
352
353 const sword::SWBuf n = tag.getAttribute("n");
354
355 buf.append("\">");
356 buf.append( (n.length() > 0) ? n.c_str() : "*" );
357 buf.append("</span> ");
358
359 myUserData->noteTypes.emplace_back(UserData::Footnote);
360 myUserData->suspendTextPassThru = true;
361 }
362 }
363 else if (/* tag.isEndTag() && */ !myUserData->noteTypes.empty()) {
364 if (myUserData->noteTypes.back() == UserData::CrossReference) {
365 buf.append("</span> ");
366// myUserData->suspendTextPassThru = false;
367 myUserData->inCrossrefNote = false;
368 }
369
370 myUserData->noteTypes.pop_back();
371 myUserData->suspendTextPassThru = false;
372 }
373 }
374 else if (tagName == "reference"sv) {
375 if (!tag.isEndTag() && !tag.isEmpty()) {
376 renderReference(tag.getAttribute("osisRef"),
377 buf,
378 *myModule,
379 *myUserData);
380 }
381 else if (tag.isEndTag()) {
382 buf.append("</a>");
383 }
384 else { // empty reference marker
385 // -- what should we do? nothing for now.
386 }
387 }
388 else if (tagName == "title"sv) {
389 if (!tag.isEndTag() && !tag.isEmpty()) {
390 buf.append("<div class=\"sectiontitle\">");
391 }
392 else if (tag.isEndTag()) {
393 buf.append("</div>");
394 }
395 else { // empty title marker
396 // what to do? is this even valid?
397 buf.append("<br/>");
398 }
399 }
400 else if (tagName == "hi"sv) { // <hi> highlighted text
401 const sword::SWBuf type = tag.getAttribute("type");
402
403 if ((!tag.isEndTag()) && (!tag.isEmpty())) {
404 if (type == "bold") {
405 buf.append("<span class=\"bold\">");
406 }
407 else if (type == "illuminated") {
408 buf.append("<span class=\"illuminated\">");
409 }
410 else if (type == "italic") {
411 buf.append("<span class=\"italic\">");
412 }
413 else if (type == "line-through") {
414 buf.append("<span class=\"line-through\">");
415 }
416 else if (type == "normal") {
417 buf.append("<span class=\"normal\">");
418 }
419 else if (type == "small-caps") {
420 buf.append("<span class=\"small-caps\">");
421 }
422 else if (type == "underline") {
423 buf.append("<span class=\"underline\">");
424 }
425 else {
426 buf.append("<span>"); //don't break markup, </span> is inserted later
427 }
428 }
429 else if (tag.isEndTag()) { //all hi replacements are html spans
430 buf.append("</span>");
431 }
432 }
433 else if (tagName == "name"sv) {
434 const sword::SWBuf type = tag.getAttribute("type");
435
436 if ((!tag.isEndTag()) && (!tag.isEmpty())) {
437 if (type == "geographic") {
438 buf.append("<span class=\"name\"><span class=\"geographic\">");
439 }
440 else if (type == "holiday") {
441 buf.append("<span class=\"name\"><span class=\"holiday\">");
442 }
443 else if (type == "nonhuman") {
444 buf.append("<span class=\"name\"><span class=\"nonhuman\">");
445 }
446 else if (type == "person") {
447 buf.append("<span class=\"name\"><span class=\"person\">");
448 }
449 else if (type == "ritual") {
450 buf.append("<span class=\"name\"><span class=\"ritual\">");
451 }
452 else {
453 buf.append("<span class=\"name\"><span>");
454 }
455 }
456 else if (tag.isEndTag()) { //all hi replacements are html spans
457 buf.append("</span></span> ");
458 }
459 }
460 else if (tagName == "transChange"sv) {
461 sword::SWBuf type( tag.getAttribute("type") );
462
463 if ( !type.length() ) {
464 type = tag.getAttribute("changeType");
465 }
466
467 if ((!tag.isEndTag()) && (!tag.isEmpty())) {
468 if (type == "added") {
469 buf.append("<span class=\"transchange\" title=\"");
470 buf.append(QObject::tr("Added text").toUtf8().constData());
471 buf.append("\"><span class=\"added\">");
472 }
473 else if (type == "amplified") {
474 buf.append("<span class=\"transchange\"><span class=\"amplified\">");
475 }
476 else if (type == "changed") {
477 buf.append("<span class=\"transchange\"><span class=\"changed\">");
478 }
479 else if (type == "deleted") {
480 buf.append("<span class=\"transchange\"><span class=\"deleted\">");
481 }
482 else if (type == "moved") {
483 buf.append("<span class=\"transchange\"><span class=\"moved\">");
484 }
485 else if (type == "tenseChange") {
486 buf.append("<span class=\"transchange\" title=\"");
487 buf.append(QObject::tr("Verb tense changed").toUtf8().constData());
488 buf.append("\"><span class=\"tenseChange\">");
489 }
490 else {
491 buf.append("<span class=\"transchange\"><span>");
492 }
493 }
494 else if (tag.isEndTag()) { //all hi replacements are html spans
495 buf.append("</span></span>");
496 }
497 }
498 else if (tagName == "p"sv) {
499 if (tag.isEndTag())
500 buf.append("</p>");
501 else
502 buf.append("<p>");
503
504 }
505 else if (tagName == "q"sv) { // <q> quote
506 //sword::SWBuf type = tag.getAttribute("type");
507 sword::SWBuf who = tag.getAttribute("who");
508 const char *lev = tag.getAttribute("level");
509 int level = (lev) ? atoi(lev) : 1;
510 sword::SWBuf quoteMarker = tag.getAttribute("marker");
511
512 if ((!tag.isEndTag())) {
513 if (!tag.isEmpty()) {
514 myUserData->quote.who = who;
515 }
516
517 if (quoteMarker.size() > 0) {
518 buf.append(quoteMarker);
519 }
520 else if (osisQToTick) //alternate " and '
521 buf.append((level % 2) ? '\"' : '\'');
522
523 if (who == "Jesus") {
524 buf.append("<span class=\"jesuswords\">");
525 }
526 }
527 else if (tag.isEndTag()) {
528 if (myUserData->quote.who == "Jesus") {
529 buf.append("</span>");
530 }
531 if (quoteMarker.size() > 0) {
532 buf.append(quoteMarker);
533 }
534 else if (osisQToTick) { //alternate " and '
535 buf.append((level % 2) ? '\"' : '\'');
536 }
537
538 myUserData->quote.who = "";
539 }
540 }
541 else if (tagName == "abbr"sv) {
542 if (!tag.isEndTag() && !tag.isEmpty()) {
543 const sword::SWBuf expansion = tag.getAttribute("expansion");
544
545 buf.append("<span class=\"abbreviation\" expansion=\"");
546 buf.append(expansion);
547 buf.append("\">");
548 }
549 else if (tag.isEndTag()) {
550 buf.append("</span>");
551 }
552 }
553 else if (tagName == "milestone"sv) {
554 const sword::SWBuf type = tag.getAttribute("type");
555
556 if ((type == "screen") || (type == "line")) {//line break
557 buf.append("<br/>");
558 userData->supressAdjacentWhitespace = true;
559 }
560 else if (type == "x-p") { //e.g. occurs in the KJV2006 module
561 //buf.append("<br/>");
562 const sword::SWBuf marker = tag.getAttribute("marker");
563 if (marker.length() > 0) {
564 buf.append(marker);
565 }
566 }
567 }
568 else if (tagName == "seg"sv) {
569 if (!tag.isEndTag() && !tag.isEmpty()) {
570
571 const sword::SWBuf type = tag.getAttribute("type");
572
573 if (type == "morph") {//line break
574 //This code is for WLC and MORPH (WHI)
575 sword::XMLTag outTag("span");
576 outTag.setAttribute("class", "morphSegmentation");
577 const char* attrValue;
578 //Transfer the values to the span
579 //Problem: the data is in hebrew/aramaic, how to encode in HTML/BibleTime?
580 if ((attrValue = tag.getAttribute("lemma"))) outTag.setAttribute("lemma", attrValue);
581 if ((attrValue = tag.getAttribute("morph"))) outTag.setAttribute("morph", attrValue);
582 if ((attrValue = tag.getAttribute("homonym"))) outTag.setAttribute("homonym", attrValue);
583
584 buf.append(outTag.toString());
585 //buf.append("<span class=\"morphSegmentation\">");
586 }
587 else {
588 buf.append("<span>");
589 }
590 }
591 else { // seg end tag
592 buf.append("</span>");
593 }
594 //qWarning(QString("handled <seg> token. result: %1").arg(buf.c_str()).latin1());
595 }
596 //divine name, don't use simple tag replacing because it may have attributes
597 else if (tagName == "divineName"sv) {
598 if (!tag.isEndTag()) {
599 buf.append("<span class=\"name\"><span class=\"divine\">");
600 }
601 else { //all hi replacements are html spans
602 buf.append("</span></span>");
603 }
604 }
605 else { //all tokens handled by OSISHTMLHref will run through the filter now
606 return sword::OSISHTMLHREF::handleToken(buf, token, userData);
607 }
608 }
609
610 return false;
611}
BtConfig & btConfig()
This is a shortchand for BtConfig::getInstance().
Definition btconfig.h:305
CSwordModuleInfo * getDefaultSwordModuleByType(const QString &moduleType)
Returns default sword module info class for a given module type.
Definition btconfig.cpp:494
CSwordModuleInfo * findFirstAvailableModule(CSwordModuleInfo::ModuleType type)
static CSwordBackend & instance() noexcept
CSwordModuleInfo * findSwordModuleByPointer(const sword::SWModule *const swmodule) const
Searches for a module with the given sword module as module().
unsigned short int swordFootnote
Definition osistohtml.h:52
sword::AttributeTypeList entryAttributes
Definition osistohtml.h:54
std::vector< NoteType > noteTypes
Definition osistohtml.h:62
struct Filters::OsisToHtml::UserData::@0 quote
bool handleToken(sword::SWBuf &buf, const char *token, sword::BasicFilterUserData *userData) override
void renderReference(char const *const osisRef, sword::SWBuf &buf, sword::SWModule const &myModule, UserData const &myUserData)