BibleTime
thmltohtml.cpp
Go to the documentation of this file.
1/*********
2*
3* In the name of the Father, and of the Son, and of the Holy Spirit.
4*
5* This file is part of BibleTime's source code, https://bibletime.info/
6*
7* Copyright 1999-2025 by the BibleTime developers.
8* The BibleTime source code is licensed under the GNU General Public License
9* version 2.0.
10*
11**********/
12
13#include "thmltohtml.h"
14
15#include <QRegularExpression>
16#include <QRegularExpressionMatch>
17#include <QUrl>
18#include <utility>
19#include "../../util/btassert.h"
20#include "../config/btconfig.h"
21#include "../drivers/cswordmoduleinfo.h"
22#include "../managers/cswordbackend.h"
23#include "../managers/referencemanager.h"
24
25// Sword includes:
26#pragma GCC diagnostic push
27#pragma GCC diagnostic ignored "-Wextra-semi"
28#pragma GCC diagnostic ignored "-Wsuggest-override"
29#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
30#ifdef __clang__
31#pragma clang diagnostic push
32#pragma clang diagnostic ignored "-Wsuggest-destructor-override"
33#endif
34#include <swmodule.h>
35#include <utilstr.h>
36#include <utilxml.h>
37#include <versekey.h>
38#ifdef __clang__
39#pragma clang diagnostic pop
40#endif
41#pragma GCC diagnostic pop
42
43
44namespace Filters {
45
47 setEscapeStringCaseSensitive(true);
48 setPassThruUnknownEscapeString(true); //the HTML widget will render the HTML escape codes
49
50 setTokenStart("<");
51 setTokenEnd(">");
52 setTokenCaseSensitive(true);
53
54 addTokenSubstitute("/foreign", "</span>");
55
56 removeTokenSubstitute("note");
57 removeTokenSubstitute("/note");
58}
59
60char ThmlToHtml::processText(sword::SWBuf &buf, const sword::SWKey *key,
61 const sword::SWModule *module)
62{
63 sword::ThMLHTML::processText(buf, key, module);
64
65 if (auto * const m =
66 CSwordBackend::instance().findModuleByName(module->getName()))
67 {
68 // only parse if the module has strongs or lemmas:
69 if (!m->has(CSwordModuleInfo::lemmas)
71 return 1;
72 }
73
74 QStringList list;
75 {
76 auto t = QString::fromUtf8(buf.c_str());
77 {
78 static QRegularExpression const tag(
79 QStringLiteral(R"PCRE(([.,;]?<sync[^>]+(type|value)=)PCRE"
80 R"PCRE("([^"]+)"[^>]+(type|value)=)PCRE"
81 R"PCRE("([^"]+)"([^<]*)>)+)PCRE"));
82 QRegularExpressionMatch match;
83 auto pos = t.indexOf(tag, 0, &match);
84 if (pos == -1) //no strong or morph code found in this text
85 return 1; //WARNING: Return already here
86 do {
87 auto const partLength = pos + match.capturedLength();
88 list.append(t.left(partLength));
89 t.remove(0, partLength);
90 pos = t.indexOf(tag, 0, &match);
91 } while (pos != -1);
92 }
93
94 // Append the trailing text to the list:
95 if (!t.isEmpty())
96 list.append(std::move(t));
97 }
98
99 static QRegularExpression const tag(
100 QStringLiteral(R"PCRE(<sync[^>]+(type|value|class)="([^"]+)"[^>]+)PCRE"
101 R"PCRE((type|value|class)="([^"]+)"[^>]+)PCRE"
102 R"PCRE(((type|value|class)="([^"]+)")*([^<]*)>)PCRE"));
103 QString result;
104 for (auto & e : list) {
105
106 // pass text ahead of <sync> stright through
107 if (auto const pos = e.indexOf(tag); pos > 0) {
108 result.append(e.left(pos));
109 e.remove(0, pos);
110 }
111
112 // parse <sync> and change to <span>
113 bool hasLemmaAttr = false;
114 bool hasMorphAttr = false;
115
116 QRegularExpressionMatch match;
117 auto pos = e.indexOf(tag, 0, &match);
118 bool insertedTag = false;
119
120 while (pos != -1) {
121 bool isMorph = false;
122 bool isStrongs = false;
123 QString value;
124 QString valueClass;
125
126 // check 3 attribute/value pairs
127
128 for (int i = 1; i < 6; i += 2) {
129 if (i > 4)
130 i++;
131
132 auto const attrName = match.captured(i);
133 auto const attrValue = match.captured(i + 1);
134 if (attrName == QStringLiteral("type")) {
135 isMorph = (attrValue == QStringLiteral("morph"));
136 isStrongs = (attrValue == QStringLiteral("Strongs"));
137 } else if (attrName == QStringLiteral("value")) {
138 value = attrValue;
139 } else if (attrName == QStringLiteral("class")) {
140 valueClass = attrValue;
141 } else { // optional 3rd attribute pair is not present:
142 BT_ASSERT(attrName.isEmpty());
143 }
144 }
145
146 // prepend the class qualifier to the value
147 if (!valueClass.isEmpty())
148 value = QStringLiteral("%1:%2").arg(valueClass, value);
149
150 if (value.isEmpty()) {
151 break;
152 }
153
154 //insert the span
155 if (!insertedTag) {
156 e.replace(pos, match.capturedLength(), QStringLiteral("</span>"));
157 pos += 7;
158
159 auto rep = QStringLiteral("<span lemma=\"%1\">").arg(value);
160 int startPos = 0;
161 QChar c = e[startPos];
162
163 while ((startPos < pos) && (c.isSpace() || c.isPunct())) {
164 ++startPos;
165 c = e[startPos];
166 }
167
168 hasLemmaAttr = isStrongs;
169 hasMorphAttr = isMorph;
170
171 pos += rep.length();
172 e.insert(startPos, std::move(rep));
173 }
174 else { //add the attribute to the existing tag
175 e.remove(pos, match.capturedLength());
176
177 if ((!isMorph && hasLemmaAttr) || (isMorph && hasMorphAttr)) { //we append another attribute value, e.g. 3000 gets 3000|5000
178 //search the existing attribute start
179 auto const & attrRegExp =
180 [isMorph]{
181 if (isMorph) {
182 static QRegularExpression const re(
183 QStringLiteral("morph=\".+?(?=\")"));
184 return re;
185 } else {
186 static QRegularExpression const re(
187 QStringLiteral("lemma=\".+?(?=\")"));
188 return re;
189 }
190 }();
191 QRegularExpressionMatch match;
192 const int foundAttrPos = e.indexOf(attrRegExp, pos, &match);
193
194 if (foundAttrPos != -1) {
195 e.insert(foundAttrPos + match.capturedLength(),
196 QStringLiteral("|%1").arg(value));
197 pos += value.length() + 1;
198
199 hasLemmaAttr = !isMorph;
200 hasMorphAttr = isMorph;
201 }
202 }
203 else { //attribute was not yet inserted
204 static QRegularExpression const re(
205 QStringLiteral("morph=|lemma="));
206 const int attrPos = e.indexOf(re, 0);
207
208 if (attrPos >= 0) {
209 hasMorphAttr = isMorph;
210 hasLemmaAttr = !isMorph;
211
212 auto attr = QStringLiteral("%1=\"%2\" ")
213 .arg(isMorph
214 ? QStringLiteral("morph")
215 : QStringLiteral("lemma"),
216 value);
217 pos += attr.length();
218 e.insert(attrPos, std::move(attr)); /// \bug e.replace() instead?
219 }
220 }
221 }
222
223 insertedTag = true;
224 pos = e.indexOf(tag, pos, &match);
225 }
226
227 result.append(std::move(e));
228 }
229
230 if (!list.isEmpty())
231 buf = result.toUtf8();
232
233 return 1;
234}
235
236
237bool ThmlToHtml::handleToken(sword::SWBuf &buf, const char *token,
238 sword::BasicFilterUserData *userData)
239{
240 if (!substituteToken(buf, token) && !substituteEscapeString(buf, token)) {
241 sword::XMLTag const tag(token);
242 BT_ASSERT(dynamic_cast<UserData *>(userData));
243 UserData * const myUserData = static_cast<UserData *>(userData);
244 // Hack to be able to call stuff like Lang():
245 sword::SWModule const * const myModule =
246 const_cast<sword::SWModule *>(myUserData->module);
247 char const * const tagName = tag.getName();
248 if (!tagName) // unknown tag, pass through:
249 return sword::ThMLHTML::handleToken(buf, token, userData);
250 if (!sword::stricmp(tagName, "foreign")) {
251 // A text part in another language, we have to set the right font
252
253 if (const char * const tagLang = tag.getAttribute("lang"))
254 buf.append("<span class=\"foreign\" lang=\"")
255 .append(tagLang)
256 .append("\">");
257 } else if (!sword::stricmp(tagName, "sync")) {
258 // If Morph or Strong or Lemma:
259 if (const char * const tagType = tag.getAttribute("type"))
260 if (!sword::stricmp(tagType, "morph")
261 || !sword::stricmp(tagType, "Strongs")
262 || !sword::stricmp(tagType, "lemma"))
263 buf.append('<').append(token).append('>');
264 } else if (!sword::stricmp(tagName, "note")) { // <note> tag
265 if (!tag.isEmpty()) {
266 if (!tag.isEndTag()) {
267 buf.append(" <span class=\"footnote\" note=\"")
268 .append(myModule->getName())
269 .append('/')
270 .append(myUserData->key->getShortText())
271 .append('/')
272 .append(QString::number(myUserData->swordFootnote).toUtf8().constData())
273 .append("\">*</span> ");
274
275 myUserData->swordFootnote++;
276 myUserData->suspendTextPassThru = true;
277 myUserData->inFootnoteTag = true;
278 } else if (tag.isEndTag()) { // end tag
279 // buf += ")</span>";
280 myUserData->suspendTextPassThru = false;
281 myUserData->inFootnoteTag = false;
282 }
283 }
284 } else if (!sword::stricmp(tagName, "scripRef")) { // a scripRef
285 // scrip refs which are embeded in footnotes may not be displayed!
286
287 if (!myUserData->inFootnoteTag) {
288 if (tag.isEndTag()) {
289 if (myUserData->inscriptRef) { // like "<scripRef passage="John 3:16">See John 3:16</scripRef>"
290 buf.append("</a></span>");
291
292 myUserData->inscriptRef = false;
293 myUserData->suspendTextPassThru = false;
294 } else { // like "<scripRef>John 3:16</scripRef>"
295 if (CSwordModuleInfo const * const mod =
296 btConfig().getDefaultSwordModuleByType(
297 "standardBible"))
298 {
300 mod->name(),
301 // current module key:
302 QString::fromUtf8(myUserData->key->getText()),
303 myModule->getLanguage()};
304
305 //it's ok to split the reference, because to descriptive text is given
306 bool insertSemicolon = false;
307 buf.append("<span class=\"crossreference\">");
308 QStringList const refs(
309 QString::fromUtf8(
310 myUserData->lastTextNode.c_str()).split(
311 ';'));
312 QString oldRef; // the previous reference to use as a base for the next refs
313 for (auto const & ref : refs) {
314 if (!oldRef.isEmpty())
315 options.refBase = oldRef; // Use the last ref as a base, e.g. Rom 1,2-3, when the next ref is only 3:3-10
316
317 // Use the parsed result as the base for the next ref:
319 ref,
320 options);
321
322 // Prepend a ref divider if we're after the first one
323 if (insertSemicolon)
324 buf.append("; ");
325
326 buf.append("<a href=\"")
327 .append(
329 *mod,
330 oldRef
331 ).toUtf8().constData()
332 )
333 .append("\" crossrefs=\"")
334 .append(oldRef.toUtf8().constData())
335 .append("\">")
336 .append(ref.toUtf8().constData())
337 .append("</a>");
338 insertSemicolon = true;
339 }
340 buf.append("</span>"); //crossref end
341 }
342 myUserData->suspendTextPassThru = false;
343 }
344 } else if (tag.getAttribute("passage") ) {
345 // The passage was given as a parameter value
346 myUserData->inscriptRef = true;
347 myUserData->suspendTextPassThru = false;
348
350 QStringLiteral("standardBible"));
351 if (! mod)
353
354 if (mod) {
355 BT_ASSERT(tag.getAttribute("passage"));
356 QString const completeRef(
358 QString::fromUtf8(
359 tag.getAttribute("passage")),
361 mod->name(),
362 QString::fromUtf8(
363 myUserData->key->getText()),
364 myModule->getLanguage()}));
365 buf.append("<span class=\"crossreference\">")
366 .append("<a href=\"")
367 .append(
369 *mod,
370 completeRef
371 ).toUtf8().constData()
372 )
373 .append("\" crossrefs=\"")
374 .append(completeRef.toUtf8().constData())
375 .append("\">");
376 } else {
377 buf.append("<span><a>");
378 }
379 } else { // We're starting a scripRef like "<scripRef>John 3:16</scripRef>"
380 myUserData->inscriptRef = false;
381 /* Let's stop text from going to output, the text get's
382 added in the -tag handler: */
383 myUserData->suspendTextPassThru = true;
384 }
385 }
386 } else if (!sword::stricmp(tagName, "div")) {
387 if (tag.isEndTag()) {
388 buf.append("</div>");
389 } else if (char const * const tagClass = tag.getAttribute("class")){
390 if (!sword::stricmp(tagClass, "sechead") ) {
391 buf.append("<div class=\"sectiontitle\">");
392 } else if (!sword::stricmp(tagClass, "title")) {
393 buf.append("<div class=\"booktitle\">");
394 }
395 }
396 } else if (!sword::stricmp(tagName, "img") && tag.getAttribute("src")) {
397 const char * value = tag.getAttribute("src");
398
399 if (value[0] == '/')
400 value++; //strip the first /
401
402 if (!myUserData->absolutePath.has_value()) {
403 auto const * const absoluteDataPath =
404 myUserData->module->getConfigEntry("AbsoluteDataPath");
405 myUserData->absolutePath.emplace(
406 myUserData->module->isUnicode()
407 ? QString::fromUtf8(absoluteDataPath)
408 : QString::fromLatin1(absoluteDataPath));
409 }
410
411 buf.append("<img src=\"")
412 .append(
413 QUrl::fromLocalFile(
414 QStringLiteral("%1/%2").arg(
415 *myUserData->absolutePath,
416 QString::fromUtf8(value))
417 ).toString().toUtf8().constData())
418 .append("\" />");
419 } else { // Let unknown token pass thru:
420 return sword::ThMLHTML::handleToken(buf, token, userData);
421 }
422 }
423 return true;
424}
425
426} // namespace Filtes
#define BT_ASSERT(...)
Definition btassert.h:17
BtConfig & btConfig()
This is a shortchand for BtConfig::getInstance().
Definition btconfig.h:305
CSwordModuleInfo * getDefaultSwordModuleByType(const QString &moduleType)
Returns default sword module info class for a given module type.
Definition btconfig.cpp:494
CSwordModuleInfo * findFirstAvailableModule(CSwordModuleInfo::ModuleType type)
static CSwordBackend & instance() noexcept
static FilterOption const strongNumbers
static FilterOption const lemmas
bool handleToken(sword::SWBuf &buf, const char *token, sword::BasicFilterUserData *userData) override
char processText(sword::SWBuf &buf, const sword::SWKey *key, const sword::SWModule *module=nullptr) override
QString parseVerseReference(QString const &ref, ParseOptions const &options)
QString encodeHyperlink(CSwordModuleInfo const &module, QString const &key)