BibleTime
bttextfilter.cpp
Go to the documentation of this file.
1 /*********
2 *
3 * In the name of the Father, and of the Son, and of the Holy Spirit.
4 *
5 * This file is part of BibleTime's source code, https://bibletime.info/
6 *
7 * Copyright 1999-2021 by the BibleTime developers.
8 * The BibleTime source code is licensed under the GNU General Public License
9 * version 2.0.
10 *
11 **********/
12 
13 #include "bttextfilter.h"
14 
15 #include <QDebug>
16 #include <QRegularExpression>
17 #include <QRegularExpressionMatch>
18 
19 
20 namespace {
21 
22 QStringList splitText(QString const & text) {
23  QStringList parts;
24  int from = 0;
25  while (from < text.length()) {
26 
27  // Get text before tag
28  int end = text.indexOf('<', from);
29  if (end == -1)
30  end = text.length();
31  parts.append(text.mid(from, end-from));
32  from = end;
33 
34  //Get tag text
35  end = text.indexOf('>', from);
36  if (end == -1)
37  end = text.length();
38  parts.append(text.mid(from, end-from+1));
39  from = end+1;
40  }
41  return parts;
42 }
43 
44 void fixDoubleBR(QStringList & parts) {
45  static QRegularExpression const rx(R"regex(<br\s*/>)regex");
46  for (int index = 2; index < parts.count(); ++index) {
47  if (parts.at(index).contains(rx) && parts.at(index-2).contains(rx))
48  parts[index] = "";
49  }
50 }
51 
52 // Typical input: <span class="footnote" note="ESV2011/Luke 11:37/1">
53 // Output: <span class="footnote" note="ESV2011/Luke 11:37/1">1</span>
54 
55 int rewriteFootnoteAsLink(QStringList & parts, int i, QString const & part) {
56  if (i + 2 >= parts.count())
57  return 1;
58 
59  static QRegularExpression const rx(R"regex(note="([^"]*))regex");
60  if (auto const match = rx.match(part); match.hasMatch()) {
61  auto const & footnoteText = parts.at(i + 1);
62  parts[i] =
63  QStringLiteral(
64  R"HTML(<a class="footnote" href="sword://footnote/%1=%2">)HTML")
65  .arg(match.captured(1)).arg(footnoteText);
66  parts[i+1] = QStringLiteral("(%1)").arg(footnoteText);
67  parts[i+2] = QStringLiteral("</a>");
68  return 3;
69  }
70  return 1;
71 }
72 
73 // Packs attribute part of href into the link
74 // Typical input: <a name="Luke11_29" href="sword://Bible/ESV2011/Luke 11:29">
75 // Output: <a href="sword://Bible/ESV2011/Luke 11:29||name=Luke11_29">
76 
77 void rewriteHref(QStringList & parts, int i, QString const & part) {
78  static QRegularExpression const rx(
79  R"regex(<a\s+(\w+)="([^"]*)"\s+(\w+)="([^"]*)")regex");
80  if (auto const match = rx.match(part); match.hasMatch())
81  parts[i] =
82  ((match.captured(1) == QStringLiteral("href"))
83  ? QStringLiteral(R"HTML(<a %1="%2||%3=%4" name="crossref">)HTML")
84  : QStringLiteral(R"HTML(<a %3="%4||%1=%2" name="crossref">)HTML"))
85  .arg(match.captured(1),
86  match.captured(2),
87  match.captured(3),
88  match.captured(4));
89 }
90 
91 // Typical input: <span lemma="H07225">God</span>
92 // Output: "<a href="sword://lemmamorph/lemma=H0430||/God" style="color: black">"
93 int rewriteLemmaOrMorphAsLink(QStringList & parts, int i, QString const & part)
94 {
95  if (i + 2 >= parts.count())
96  return 1;
97 
98  QString value;
99  {
100  static QRegularExpression const rx(R"regex(lemma="([^"]*)")regex");
101  if (auto const match = rx.match(part); match.hasMatch())
102  value = QStringLiteral("lemma=") + match.captured(1);
103  }{
104  static QRegularExpression const rx(R"regex(morph="([^"]*)")regex");
105  if (auto const match = rx.match(part); match.hasMatch()) {
106  if (value.isEmpty()) {
107  value = QStringLiteral("morph=") + match.captured(1);
108  } else {
109  value = QStringLiteral("%1||morph=%2")
110  .arg(value, match.captured(1));
111  }
112  }
113  }
114 
115  auto const & refText = parts.at(i + 1);
116  parts[i] =
117  QStringLiteral(
118  R"HTM(<a id="lemmamorph" href="sword://lemmamorph/%1/%2">)HTM")
119  .arg(value, refText);
120  parts[i + 2] = QStringLiteral("</a>");
121  return 3;
122 }
123 
124 int rewriteTag(QStringList & parts, int i, QString const & tag) {
125  if (i + 2 >= parts.count())
126  return 1;
127  parts[i] = "<" + tag + ">";
128  parts[i+2] ="</" + tag + ">";
129  return 3;
130 }
131 
132 int rewriteTitle(QStringList & parts, int i, QString const & tag) {
133  if (i + 2 >= parts.count())
134  return 1;
135  parts[i] = "<div><big><" + tag + ">";
136  parts[i+2] ="</" + tag + "></big></div>";
137  return 3;
138 }
139 
140 int rewriteClass(QStringList & parts, int i, QString const & part) {
141 
142  if (part.contains(QStringLiteral(R"HTML(class="footnote")HTML"))) {
143  return rewriteFootnoteAsLink(parts, i, part);
144  } else if (part.contains(QStringLiteral(R"HTML(class="bold")HTML"))) {
145  return rewriteTag(parts, i, "b");
146  } else if (part.contains(QStringLiteral(R"HTML(class="italic")HTML"))) {
147  return rewriteTag(parts, i, "i");
148  } else if (part.contains(QStringLiteral(R"HTML(class="chaptertitle")HTML"))) {
149  return rewriteTitle(parts, i, "b");
150  } else if (part.contains(QStringLiteral(R"HTML(class="sectiontitle")HTML"))) {
151  return rewriteTitle(parts, i, "b");
152  } else if (part.contains(QStringLiteral(R"HTML(class="booktitle")HTML"))) {
153  return rewriteTitle(parts, i, "b");
154  }
155  return 3;
156 }
157 
158 } // anonymous namespace
159 
160 BtTextFilter::BtTextFilter() = default;
161 
162 BtTextFilter::~BtTextFilter() = default;
163 
164 QString BtTextFilter::processText(const QString &text) {
165  if (text.isEmpty())
166  return text;
167  QString localText = text;
168  { // Fix !P tag which is not rich text:
169  int index = 0;
170  while ((index = localText.indexOf(QStringLiteral("<!P>"))) >= 0)
171  localText.remove(index,4);
172  }
173  auto parts = splitText(localText);
174  fixDoubleBR(parts);
175 
176  for (int i = 0; i < parts.count();) {
177  if (auto const & part = parts.at(i); part.startsWith('<')) { // is tag
178  if (part.contains(QStringLiteral(R"HTML(class=)HTML"))) {
179  i += rewriteClass(parts, i, part);
180  } else if (part.contains(QStringLiteral(R"HTML(class="footnote")HTML"))) {
181  i += rewriteFootnoteAsLink(parts, i, part);
182  } else if (part.contains(QStringLiteral(R"HTML(href=")HTML"))) {
183  rewriteHref(parts, i, part);
184  ++i;
185  } else if (part.contains(QStringLiteral(R"HTML(lemma=")HTML"))
186  || part.contains(QStringLiteral(R"HTML(morph=")HTML")))
187  {
188  i += rewriteLemmaOrMorphAsLink(parts, i, part);
189  } else {
190  ++i;
191  }
192  } else {
193  ++i;
194  }
195  }
196  return parts.join(QString());
197 }
QString processText(const QString &text) override
~BtTextFilter() override
QStringList splitText(QString const &text)
int rewriteClass(QStringList &parts, int i, QString const &part)
int rewriteLemmaOrMorphAsLink(QStringList &parts, int i, QString const &part)
int rewriteTitle(QStringList &parts, int i, QString const &tag)
void fixDoubleBR(QStringList &parts)
void rewriteHref(QStringList &parts, int i, QString const &part)
int rewriteFootnoteAsLink(QStringList &parts, int i, QString const &part)
int rewriteTag(QStringList &parts, int i, QString const &tag)