1
0
Fork 0
libreoffice/helpcontent2/to-wiki/getalltitles.py
Daniel Baumann 8e63e14cf6
Adding upstream version 4:25.2.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
2025-06-22 16:20:04 +02:00

154 lines
3.7 KiB
Python
Executable file

#!/usr/bin/env python
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
import sys
import os
import xml.parsers.expat
alltitles=[]
def is_present(title):
for i in alltitles:
try:
if i.strip() == title.strip():
return True
except:
return False
return False
def make_unique(title):
n=0
t = title
while is_present(t):
n=n+1
t = title+"_%d"%(n)
return t
replace_text_list = [
["$[officename]","LibreOffice"],
["%PRODUCTNAME","LibreOffice"],
["$PRODUCTNAME","LibreOffice"],
['"+"',"Plus"],
['"*"',"Star"],
['"-"',"Minus"],
['"/"',"Slash"],
['"^"',"Cap"],
['#',"No"],
[')','_'],
['(','_'],
[']','_'],
['[','_'],
['\\','_'],
['/','_'],
['&',"and"],
[';','_']
]
replace_readable_list = [
["$[officename]","{{ProductName}}"],
["%PRODUCTNAME","{{ProductName}}"],
["$PRODUCTNAME","{{ProductName}}"]
]
modules_list = [
["sbasic","Basic"],
["scalc","Calc"],
["schart","Chart"],
["sdraw","Draw"],
["simpress","Impress"],
["smath","Math"],
["swriter","Writer"],
["shared","Common"]
]
def get_module(text):
for i in modules_list:
if text.find('/' + i[0] + '/') >=0:
return i[1]
return ""
def replace_text(text, replace_list):
for i in replace_list:
if text.find(i[0]) >= 0:
text = text.replace(i[0],i[1])
return text
def wiki_text(text):
t = replace_text(text, replace_text_list)
if t == '':
t = 'LibreOffice' # hardcoded fallback
return t.strip()
def readable_text(text):
return replace_text(text, replace_readable_list)
class TitleParser:
title = ''
is_title = False
def start_element(self, name, attrs):
if name == 'title':
self.is_title = True
def end_element(self, name):
if name == 'title':
self.is_title = False
def char_data(self, data):
if self.is_title:
self.title = self.title + data
def get_title(self):
return self.title.strip()
def parsexhp(filename):
module = get_module(filename)
if module == '':
return
parsing = True
file=open(filename,"r")
p = xml.parsers.expat.ParserCreate()
tp = TitleParser()
p.StartElementHandler = tp.start_element
p.EndElementHandler = tp.end_element
p.CharacterDataHandler = tp.char_data
buf = file.read()
try:
p.Parse(buf)
except:
sys.stderr.write('Cannot parse %s, skipping it\n'% filename)
file.close()
return
file.close()
title = tp.get_title()
if len(title) > 0:
readable_title = readable_text(title)
title = module + '/' + wiki_text(title)
title = title.replace(' ', '_')
title = title.replace('___', '_')
title = title.replace('__', '_')
title = title.strip('_')
title = make_unique(title)
alltitles.append(title)
return((filename, title, readable_title))
# Main Function
def gettitles(path):
pattern = "xhp"
alltitles = []
for root, dirs, files in os.walk(path):
for i in files:
if i.find(pattern) >= 0:
t = parsexhp(root+"/"+i)
if t is not None:
alltitles.append(t)
return alltitles
# vim:set shiftwidth=4 softtabstop=4 expandtab: