1919 */
2020
2121#include " zimcreatorfs.h"
22- #include " ../tools.h"
23- #include " tools.h"
2422
25- #include < fstream>
2623#include < dirent.h>
24+ #include < limits.h>
2725#include < sys/stat.h>
28- #include < regex>
2926#include < unistd.h>
30- # include < limits.h >
27+
3128#include < cassert>
29+ #include < fstream>
30+ #include < regex>
31+
32+ #include " ../tools.h"
33+ #include " tools.h"
3234
33- void parse_redirectArticles (std::istream& in_stream, redirect_handler handler) {
35+ void parse_redirectArticles (std::istream& in_stream, redirect_handler handler)
36+ {
3437 std::string line;
3538 int line_number = 1 ;
3639 while (std::getline (in_stream, line)) {
3740 std::regex line_regex (" ^([^\\ t]+)\\ t([^\\ t]+)\\ t([^\\ t]+)$" );
3841 std::smatch matches;
3942 if (!std::regex_search (line, matches, line_regex) || matches.size () != 4 ) {
40- throw std::runtime_error (
41- Formatter () << " Invalid line #" << line_number << " : '" << line << " '"
42- );
43+ throw std::runtime_error (Formatter () << " Invalid line #" << line_number
44+ << " : '" << line << " '" );
4345 }
4446
45- Redirect redirect = {
46- .path = matches[1 ].str (),
47- .title = matches[2 ].str (),
48- .target = matches[3 ].str ()
49- };
47+ Redirect redirect = {.path = matches[1 ].str (),
48+ .title = matches[2 ].str (),
49+ .target = matches[3 ].str ()};
5050 handler (redirect);
5151 ++line_number;
5252 }
@@ -55,14 +55,14 @@ void parse_redirectArticles(std::istream& in_stream, redirect_handler handler) {
5555bool isVerbose ();
5656
5757ZimCreatorFS::ZimCreatorFS (std::string _directoryPath)
58- : directoryPath(_directoryPath)
58+ : directoryPath(_directoryPath)
5959{
6060 char buf[PATH_MAX];
6161
6262 if (realpath (directoryPath.c_str (), buf) != buf) {
63- throw std::invalid_argument (
64- Formatter () << " Unable to canonicalize HTML directory path "
65- << directoryPath << " : " << strerror (errno));
63+ throw std::invalid_argument (Formatter ()
64+ << " Unable to canonicalize HTML directory path "
65+ << directoryPath << " : " << strerror (errno));
6666 }
6767
6868 canonical_basedir = buf;
@@ -74,17 +74,13 @@ void ZimCreatorFS::add_redirectArticles_from_file(const std::string& path)
7474
7575 in_stream.open (path.c_str ());
7676 try {
77- parse_redirectArticles (in_stream,
78- [this ](Redirect redirect) {
79- this ->addRedirection (
80- redirect.path ,
81- redirect.title ,
82- redirect.target ,
83- {{zim::writer::HintKeys::FRONT_ARTICLE, 1 }}
84- );
85- }
86- );
87- } catch (const std::runtime_error& e) {
77+ parse_redirectArticles (in_stream, [this ](Redirect redirect) {
78+ this ->addRedirection (redirect.path ,
79+ redirect.title ,
80+ redirect.target ,
81+ {{zim::writer::HintKeys::FRONT_ARTICLE, 1 }});
82+ });
83+ } catch (const std::runtime_error& e) {
8884 std::cerr << e.what () << " \n in redirect file " << path << std::endl;
8985 in_stream.close ();
9086 exit (1 );
@@ -171,14 +167,14 @@ void ZimCreatorFS::visitDirectory(const std::string& path)
171167
172168void ZimCreatorFS::addFile (const std::string& path)
173169{
174- auto url = path.substr (directoryPath.size ()+ 1 );
170+ auto url = path.substr (directoryPath.size () + 1 );
175171 auto mimetype = getMimeTypeForFile (directoryPath, url);
176172 auto title = std::string{};
177173 zim::writer::Hints hints;
178174
179175 std::shared_ptr<zim::writer::Item> item;
180- if ( mimetype.find (" text/html" ) != std::string::npos
181- || mimetype.find (" text/css" ) != std::string::npos) {
176+ if (mimetype.find (" text/html" ) != std::string::npos
177+ || mimetype.find (" text/css" ) != std::string::npos) {
182178 auto content = getFileContent (path);
183179
184180 if (mimetype.find (" text/html" ) != std::string::npos) {
@@ -193,14 +189,17 @@ void ZimCreatorFS::addFile(const std::string& path)
193189 adaptCss (content, url);
194190 }
195191
196- item = zim::writer::StringItem::create (url, mimetype, title, hints, content);
192+ item
193+ = zim::writer::StringItem::create (url, mimetype, title, hints, content);
197194 } else {
198- item = std::make_shared<zim::writer::FileItem>(url, mimetype, title, hints, path);
195+ item = std::make_shared<zim::writer::FileItem>(
196+ url, mimetype, title, hints, path);
199197 }
200198 addItem (item);
201199}
202200
203- void ZimCreatorFS::processSymlink (const std::string& curdir, const std::string& symlink_path)
201+ void ZimCreatorFS::processSymlink (const std::string& curdir,
202+ const std::string& symlink_path)
204203{
205204 /* #102 Links can be 3 different types:
206205 * - dandling (not pointing to a valid file)
@@ -211,26 +210,28 @@ void ZimCreatorFS::processSymlink(const std::string& curdir, const std::string&
211210 if (realpath (symlink_path.c_str (), resolved) != resolved) {
212211 // looping symlinks also fall here: Too many levels of symbolic links
213212 // It also handles dangling symlink: No such file or directory
214- std::cerr << " Unable to resolve symlink " << symlink_path
215- << " : " << strerror (errno) << std::endl;
213+ std::cerr << " Unable to resolve symlink " << symlink_path << " : "
214+ << strerror (errno) << std::endl;
216215 return ;
217216 }
218217
219218 if (isDirectory (resolved)) {
220- std::cerr << " Skip symlink " << symlink_path
221- << " : points to a directory " << std::endl;
219+ std::cerr << " Skip symlink " << symlink_path << " : points to a directory "
220+ << std::endl;
222221 return ;
223222 }
224223
225- if (strncmp (canonical_basedir.c_str (), resolved, canonical_basedir.size ()) != 0
224+ if (strncmp (canonical_basedir.c_str (), resolved, canonical_basedir.size ())
225+ != 0
226226 || resolved[canonical_basedir.size ()] != ' /' ) {
227227 std::cerr << " Skip symlink " << symlink_path
228228 << " : points outside of HTML directory" << std::endl;
229229 return ;
230230 }
231231
232232 std::string source_url = symlink_path.substr (directoryPath.size () + 1 );
233- std::string target_url = std::string (resolved).substr (canonical_basedir.size () + 1 );
233+ std::string target_url
234+ = std::string (resolved).substr (canonical_basedir.size () + 1 );
234235 addRedirection (source_url, " " , target_url);
235236}
236237
@@ -256,11 +257,34 @@ inline std::string removeLocalTagAndParameters(const std::string& url)
256257
257258struct GumboOutputDestructor {
258259 GumboOutputDestructor (GumboOutput* output) : output(output) {}
259- ~GumboOutputDestructor () { gumbo_destroy_output (&kGumboDefaultOptions , output); }
260+ ~GumboOutputDestructor ()
261+ {
262+ gumbo_destroy_output (&kGumboDefaultOptions , output);
263+ }
260264 GumboOutput* output;
261265};
262266
263- std::string ZimCreatorFS::parseAndAdaptHtml (std::string& data, std::string& title, const std::string& url)
267+ inline bool isValidTitle (const std::string& title, size_t min_length)
268+ {
269+ static const std::regex numeric_regex (" [0-9]+" );
270+ static const std::regex alphanumeric_regex (" [a-zA-Z0-9]" );
271+
272+ /* 1. Ensure the title has at least 3 characters */
273+ bool isLongEnough = title.length () >= min_length;
274+
275+ /* 2. Ensure the title is not only numeric */
276+ bool isNotNumeric = !std::regex_match (title, numeric_regex);
277+
278+ /* 3. Ensure the title contains at least one alphanumeric character
279+ * (not all special chars) */
280+ bool hasAlphanumeric = std::regex_search (title, alphanumeric_regex);
281+
282+ return isLongEnough && isNotNumeric && hasAlphanumeric;
283+ }
284+
285+ std::string ZimCreatorFS::parseAndAdaptHtml (std::string& data,
286+ std::string& title,
287+ const std::string& url)
264288{
265289 GumboOutput* output = gumbo_parse (data.c_str ());
266290 GumboOutputDestructor outputDestructor (output);
@@ -310,7 +334,8 @@ std::string ZimCreatorFS::parseAndAdaptHtml(std::string& data, std::string& titl
310334 auto redirectUrl = computeAbsolutePath (url, decodeUrl (targetUrl));
311335 auto redirectUrlPath = directoryPath + " /" + redirectUrl;
312336 if (!fileExists (redirectUrlPath)) {
313- throw std::runtime_error (" '" + url + " ' HTML redirection target path '"
337+ throw std::runtime_error (" '" + url
338+ + " ' HTML redirection target path '"
314339 + redirectUrlPath + " ' doesn't exist." );
315340 }
316341 return redirectUrl;
@@ -331,29 +356,32 @@ std::string ZimCreatorFS::parseAndAdaptHtml(std::string& data, std::string& titl
331356 std::replace (title.begin (), title.end (), ' _' , ' ' );
332357 }
333358 }
359+ // Extra validation the title
360+ if (!isValidTitle (title, 3 )) {
361+ std::cerr << " Warning: Generated title is invalid for URL: " << url
362+ << std::endl;
363+ title = " Unknown title" ; // or some other default title
364+ }
334365 }
335366 return " " ;
336367}
337368
338- void ZimCreatorFS::adaptCss (std::string& data, const std::string& url) {
369+ void ZimCreatorFS::adaptCss (std::string& data, const std::string& url)
370+ {
339371 /* Rewrite url() values in the CSS */
340372 size_t startPos = 0 ;
341373 size_t endPos = 0 ;
342374 std::string targetUrl;
343375
344376 while ((startPos = data.find (" url(" , endPos))
345377 && startPos != std::string::npos) {
346-
347378 /* URL delimiters */
348379 endPos = data.find (" )" , startPos);
349- startPos = startPos + (data[startPos + 4 ] == ' \' '
350- || data[startPos + 4 ] == ' "'
351- ? 5
352- : 4 );
353- endPos = endPos - (data[endPos - 1 ] == ' \' '
354- || data[endPos - 1 ] == ' "'
355- ? 1
356- : 0 );
380+ startPos
381+ = startPos
382+ + (data[startPos + 4 ] == ' \' ' || data[startPos + 4 ] == ' "' ? 5 : 4 );
383+ endPos = endPos
384+ - (data[endPos - 1 ] == ' \' ' || data[endPos - 1 ] == ' "' ? 1 : 0 );
357385 targetUrl = data.substr (startPos, endPos - startPos);
358386 std::string startDelimiter = data.substr (startPos - 1 , 1 );
359387 std::string endDelimiter = data.substr (endPos, 1 );
@@ -385,9 +413,9 @@ void ZimCreatorFS::adaptCss(std::string& data, const std::string& url) {
385413 data,
386414 startDelimiter + targetUrl + endDelimiter,
387415 startDelimiter + " data:" + mimeType + " ;base64,"
388- + base64_encode (reinterpret_cast < const unsigned char *>(
389- fontContent.c_str ()),
390- fontContent.length ())
416+ + base64_encode (
417+ reinterpret_cast < const unsigned char *>( fontContent.c_str ()),
418+ fontContent.length ())
391419 + endDelimiter);
392420 } catch (...) {
393421 }
0 commit comments