#!/usr/bin/python3 # $Id: htmlhelp-qthelp.py $ ## @file # A python script to create a .qhp file out of a given htmlhelp # folder. Lots of things about the said folder is assumed. Please # see the code and inlined comments. import sys, getopt import os.path import re import codecs import logging if sys.version_info >= (3, 0): from html.parser import HTMLParser else: from HTMLParser import HTMLParser __copyright__ = \ """ Copyright (C) 2006-2023 Oracle and/or its affiliates. This file is part of VirtualBox base platform packages, as available from https://www.virtualbox.org. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, in version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . SPDX-License-Identifier: GPL-3.0-only """ # number of opened and not yet closed section tags of toc section open_section_tags = 0 html_files = [] # use html_parser stuff to collect '] for html_file_name in html_files: full_html_path = os.path.join(folder, html_file_name) file_content = codecs.open(full_html_path, encoding='iso-8859-1').read() class html_parser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.a_tag=[] def handle_starttag(self, tag, attributes): if tag != 'div' and tag != 'a': return if tag == 'a': for a in attributes: if a[0] == 'name': self.a_tag.append(a[1]) parser = html_parser() parser.feed(file_content) for k in parser.a_tag: line = '' keywords_section_lines.append(line); keywords_section_lines.append('') return keywords_section_lines # find the png files under /images folder and create a part of the # qhelp project file with tags def create_image_list(folder): image_folder_name = 'images' image_files_list = [] # Look for 'images' sub folder subdirs = [x[0] for x in os.walk(folder)] full_folder_path = os.path.join(folder, image_folder_name) if full_folder_path not in subdirs: logging.error('Image subfolder "%s" is not found under "%s".', image_folder_name, folder) return image_files_list; png_files = [] for f in os.listdir(full_folder_path): png_files.append(image_folder_name + '/' + f) image_files_list.append('images/' + f + '') return image_files_list # open htmlhelp.hhp files and read the list of html files from there def create_html_list(folder): global html_files file_name = 'htmlhelp.hhp' html_file_lines = [] if not file_name in os.listdir(folder): logging.error('Could not find the file "%s" in "%s"', file_name, folder) return html_file_lines full_path = os.path.join(folder, 'htmlhelp.hhp') file = codecs.open(full_path, encoding='iso-8859-1') lines = file.readlines() file.close() # first search for the [FILES] marker then collect .html lines marker_found = 0 for line in lines: if '[FILES]' in line: marker_found = 1 continue if marker_found == 0: continue if '.html' in line: html_file_lines.append('' + line.strip('\n') + '') html_files.append(line.strip('\n')) return html_file_lines def create_files_section(folder): files_section_lines = [''] files_section_lines += create_image_list(folder) files_section_lines += create_html_list(folder) files_section_lines.append('') return files_section_lines def parse_param_tag(line): label = 'value="' start = line.find(label); if start == -1: return '' start += len(label) end = line.find('"', start) if end == -1: return ''; return line[start:end] # look at next two lines. they are supposed to look like the following # # # parse out value fields and return # title="Oracle VM VirtualBox" ref="index.html def parse_object_tag(lines, index): result='' if index + 2 > len(lines): logging.warning('Not enough tags after this one "%s"',lines[index]) return result if not re.match(r'^\s*' else: logging.warning('Title or ref part is empty for the tag "%s"', lines[index]) return result # parse any string other than staring with

' elif re.match(r'^\s*' return '' def parse_line(lines, index): result='' # if the line starts with