tools: rework generating documentation from libnm meta data

With the given input, this produces *exactly* the same XML as before. - the parsing is now stricter (and thus the code more verbose). No funny stuff, get the annotations correct. - on parsing errors, we log now the affecting lines - "nm-setting-ip-config.c" is a base class. Previously it was ignored and for the moment we still do that. Next, we will allow to also describe properties there. - prepare the code to better preserve whitespace, indentation and line wrappings. In particular, to honor a blank line to indicate a line break and support paragraphs. This is not yet done to compare the output to before, but will be turned on with a small patch next. - the code will make it simple to promote the XML attributes to nodes. Attributes aren't great, let's write XML nodes later. We will only need to adjust the "keywords" dictionary for that, but this change will require changes to the entire chain of tools.
2022-08-29 10:37:06 +02:00
parent 604bb9f9fa
commit 8fc7b6df12
1 changed files with 384 additions and 115 deletions
--- a/tools/generate-docs-nm-property-infos.py
+++ b/tools/generate-docs-nm-property-infos.py
@@ -1,143 +1,412 @@
 #!/usr/bin/env python
 # SPDX-License-Identifier: LGPL-2.1-or-later
 import os
 import re
 import sys
 import collections
 import xml.etree.ElementTree as ET
-def get_setting_name(one_file):
+class LineError(Exception):
-    setting_name = ""
+    def __init__(self, line_no, msg):
-    assert re.match(r".*/libnm-core-impl/nm-setting-.*\.c$", one_file)
+        Exception.__init__(self, msg)
-    header_path = one_file.replace("libnm-core-impl", "libnm-core-public")
+        self.line_no = line_no
-    header_path = header_path.replace(".c", ".h")
+
 _dbg_level = 0
 try:
    _dbg_level = int(os.getenv("NM_DEBUG_GENERATE_DOCS", 0))
 except Exception:
    pass
 def dbg(msg, level=1):
    if level <= _dbg_level:
        print(msg)
 def iter_unique(iterable, default=None):
    found = False
    for i in iterable:
        assert not found
        found = True
        i0 = i
    if found:
        return i0
    return default
 def xnode_get_or_create(root_node, node_name, name):
    # From root_node, get the node "<{node_name} name={name} .../>"
    # or create one, if it doesn't exist.
    node = iter_unique(
        (node for node in root_node.findall(node_name) if node.attrib["name"] == name)
    )
    if node is None:
        created = True
        node = ET.SubElement(root_node, node_name, name=name)
    else:
        created = False
    return node, created
 def get_setting_names(source_file):
    m = re.match(r"^(.*)/libnm-core-impl/(nm-setting-[^/]*)\.c$", source_file)
    assert m
    path_prefix, file_base = (m.group(1), m.group(2))
    if file_base == "nm-setting-ip-config":
        # Special case ip-config, which is a base class.
        return None
    header_file = "%s/libnm-core-public/%s.h" % (path_prefix, file_base)
    try:
-        header_reader = open(header_path, "r")
+        f = open(header_file, "r")
    except OSError:
-        print("Can not open header file: %s" % (header_path))
+        raise Exception(
-        exit(1)
+            'Can not open header file "%s" for "%s"' % (header_file, source_file)
        )
-    line = header_reader.readline()
+    with f:
-    while line != "":
+        for line in f:
-        setting_name_found = re.search(r"NM_SETTING_.+SETTING_NAME\s+\"(\S+)\"", line)
+            m = re.search(r"^#define +NM_SETTING_.+SETTING_NAME\s+\"(\S+)\"$", line)
-        if setting_name_found:
+            if m:
-            setting_name = setting_name_found.group(1)
+                return m.group(1)
-            break
+
-        line = header_reader.readline()
+    raise Exception(
-    header_reader.close()
+        'Can\'t find setting name in header file "%s" for "%s"'
-    return setting_name
+        % (header_file, source_file)
    )
-def scan_doc_comments(plugin, setting_node, file, start_tag, end_tag):
+def get_file_infos(source_files):
-    data = []
+    for source_file in source_files:
-    push_flag = 0
+        setting_name = get_setting_names(source_file)
-    try:
+        if setting_name:
-        file_reader = open(file, "r")
+            yield setting_name, source_file
    except OSError:
        print("Can not open file: %s" % (file))
        exit(1)
    line = file_reader.readline()
    while line != "":
        if start_tag in line:
            push_flag = 1
        elif end_tag in line and push_flag == 1:
            push_flag = 0
            parsed_data = process_data(data)
            if parsed_data:
                write_data(setting_node, parsed_data)
            data = []
        elif push_flag == 1:
            data.append(line)
        line = file_reader.readline()
    file_reader.close()
    return
-keywords = [
+KEYWORD_XML_TYPE_NESTED = "nested"
-    "property",
+KEYWORD_XML_TYPE_NODE = "node"
-    "variable",
+KEYWORD_XML_TYPE_ATTR = "attr"
-    "format",
+
-    "values",
+keywords = collections.OrderedDict(
-    "default",
+    [
-    "example",
+        ("property", KEYWORD_XML_TYPE_ATTR),
-    "description",
+        ("variable", KEYWORD_XML_TYPE_ATTR),
-    "description-docbook",
+        ("format", KEYWORD_XML_TYPE_ATTR),
-]
+        ("values", KEYWORD_XML_TYPE_ATTR),
-kwd_first_line_re = re.compile(
+        ("default", KEYWORD_XML_TYPE_ATTR),
-    r"^\s*\**\s+({}):\s+(.*?)\s*$".format("|".join(keywords))
+        ("example", KEYWORD_XML_TYPE_ATTR),
        ("description", KEYWORD_XML_TYPE_ATTR),
        ("description-docbook", KEYWORD_XML_TYPE_NESTED),
    ]
 )
 kwd_more_line_re = re.compile(r"^\s*\**\s+(.*?)\s*$")
-def process_data(data):
+def keywords_allowed(tag, keyword):
-    parsed_data = {}
+    # certain keywords might not be valid for some tags.
-    if not data:
+    # Currently, all of them are always valid.
-        return parsed_data
+    assert keyword in keywords
-    keyword = ""
+    return True
-    for line in data:
+
-        kwd_first_line_found = kwd_first_line_re.search(line)
+
-        if kwd_first_line_found:
+def write_data(tag, setting_node, line_no, parsed_data):
-            keyword = kwd_first_line_found.group(1)
+
-            if keyword == "description-docbook":
+    for k in parsed_data.keys():
-                value = kwd_first_line_found.group(2) + "\n"
+        assert keywords_allowed(tag, k)
-            else:
+        assert k in keywords
-                value = kwd_first_line_found.group(2) + " "
+
-            parsed_data[keyword] = value
+    name = parsed_data["property"]
    property_node, created = xnode_get_or_create(setting_node, "property", name)
    if not created:
        raise LineError(line_no, 'Duplicate property <property name="%s"...' % (name,))
    for k, xmltype in keywords.items():
        if k == "property":
            continue
-        kwd_more_line_found = kwd_more_line_re.search(line)
+
-        if kwd_more_line_found:
+        v = parsed_data.get(k, None)
-            if not keyword:
+        if v is None:
-                print("Extra mess in a comment: %s" % (line))
+            if k == "variable":
-                exit(1)
+                v = name
-            if keyword == "description-docbook":
+            elif k == 'description-docbook':
-                value = kwd_more_line_found.group(1) + "\n"
+                continue
            else:
-                value = kwd_more_line_found.group(1) + " "
+                v = ""
-            parsed_data[keyword] += value
+
-    for keyword in keywords:
+        if xmltype == KEYWORD_XML_TYPE_NESTED:
-        if keyword == "variable" and keyword not in parsed_data:
+            # Set as XML nodes. The input data is XML itself.
-            parsed_data[keyword] = parsed_data["property"]
+            des = ET.fromstring("<%s>%s</%s>" % (k, v, k))
-        elif keyword not in parsed_data:
+            property_node.append(des)
-            parsed_data[keyword] = ""
+        elif xmltype == KEYWORD_XML_TYPE_NODE:
-    for key in parsed_data.keys():
+            node = ET.SubElement(property_node, k)
-        parsed_data[key] = parsed_data[key].rstrip()
+            node.text = v
        elif xmltype == KEYWORD_XML_TYPE_ATTR:
            property_node.set(k, v)
        else:
            assert False
 kwd_first_line_re = re.compile(r"^ *\* ([-a-z0-9]+): (.*)$")
 kwd_more_line_re = re.compile(r"^ *\*( *)(.*?)\s*$")
 def parse_data(tag, line_no, lines):
    assert lines
    parsed_data = {}
    keyword = ""
    first_line = True
    indent = None
    for line in lines:
        assert "\n" not in line
        line_no += 1
        m = re.search(r"^     \*(| .*)$", line)
        if not m:
            raise LineError(line_no, 'Invalid formatted line "%s"' % (line,))
        content = m.group(1)
        m = re.search("^ ([-a-z0-9]+):(.*)$", content)
        text_keyword_started = None
        if m:
            keyword = m.group(1)
            if keyword in parsed_data:
                raise LineError(line_no, 'Duplicated keyword "%s"' % (keyword,))
            text = m.group(2)
            text_keyword_started = text
            if text:
                if text[0] != " " or len(text) == 1:
                    raise LineError(line_no, 'Invalid formatted line "%s"' % (line,))
                text = text[1:]
            if not keywords_allowed(tag, keyword):
                raise LineError(line_no, 'Invalid key "%s" for %s' % (keyword, tag))
            if parsed_data and keyword == "property":
                raise LineError(line_no, 'The "property:" keywork must be first')
            parsed_data[keyword] = text
            new_keyword_stated = True
            indent = None
        else:
            if content == "":
                text = ""
            elif content[0] == " " and len(content) > 1:
                text = content[1:]
                assert text
                if indent is None:
                    indent = re.search("^( *)", text).group(1)
                if not text.startswith(indent):
                    raise LineError(line_no, 'Unexpected indention in "%s"' % (line,))
                text = text[len(indent) :]
            else:
                raise LineError(line_no, 'Unexpected line "%s"' % (line,))
            if not keyword:
                raise LineError(line_no, "Expected data in comment: %s" % (line))
            if text and text[0] == "\\":
                assert False
                text = text[1:]
            if separator == " " and text == "":
                # No separator to add. This is a blank line
                pass
            else:
                parsed_data[keyword] = parsed_data[keyword] + separator + text.strip()
        if keywords[keyword] == KEYWORD_XML_TYPE_NESTED:
            # This is plain XML. They lines are joined by newlines.
            separator = "\n"
        elif text_keyword_started == "":
            # If the previous line was just "tag:$", we don't need a separator
            # the next time.
            separator = ""
        elif not text:
            # A blank line is used to mark a line break, while otherwise
            # lines are joined by space.
            separator = "  "
        else:
            separator = " "
    if "property" not in parsed_data:
        raise LineError(line_no, 'Missing "property:" tag')
    for keyword in keywords.keys():
        if not keywords_allowed(tag, keyword):
            continue
        if keyword not in parsed_data:
            parsed_data[keyword] = None
    return parsed_data
-def write_data(setting_node, parsed_data):
+def process_setting(tag, root_node, source_file, setting_name):
    property_node = ET.SubElement(setting_node, "property")
    property_node.set("name", parsed_data["property"])
    property_node.set("variable", parsed_data["variable"])
    property_node.set("format", parsed_data["format"])
    property_node.set("values", parsed_data["values"])
    property_node.set("default", parsed_data["default"])
    property_node.set("example", parsed_data["example"])
    property_node.set("description", parsed_data["description"])
    if parsed_data["description-docbook"]:
        des = ET.fromstring(
            "<description-docbook>"
            + parsed_data["description-docbook"]
            + "</description-docbook>"
        )
        property_node.append(des)
    dbg(
        "> > tag:%s, source_file:%s, setting_name:%s" % (tag, source_file, setting_name)
    )
-if len(sys.argv) < 4:
+    start_tag = "---" + tag + "---"
-    print("Usage: %s [plugin] [output-xml-file] [srcfiles]" % (sys.argv[0]))
+    end_tag = "---end---"
    exit(1)
-argv = list(sys.argv[1:])
+    setting_node, created = xnode_get_or_create(root_node, "setting", setting_name)
-plugin, output, source_files = argv[0], argv[1], argv[2:]
+    if created:
 start_tag = "---" + plugin + "---"
 end_tag = "---end---"
 root_node = ET.Element("nm-setting-docs")
 for one_file in source_files:
    setting_name = get_setting_name(one_file)
    if setting_name:
        setting_node = ET.SubElement(root_node, "setting", name=setting_name)
        setting_node.text = "\n"
        scan_doc_comments(plugin, setting_node, one_file, start_tag, end_tag)
-ET.ElementTree(root_node).write(output)
+    try:
        f = open(source_file, "r")
    except OSError:
        raise Exception("Can not open file: %s" % (source_file))
    lines = None
    with f:
        line_no = 0
        just_had_end_tag = False
        line_no_start = None
        for line in f:
            line_no += 1
            if line and line[-1] == "\n":
                line = line[:-1]
            if just_had_end_tag:
                # After the end-tag, we still expect one particular line. Be strict about
                # this.
                just_had_end_tag = False
                if line != "     */":
                    raise LineError(
                        line_no,
                        'Invalid end tag "%s". Expects literally "     */" after end-tag'
                        % (line,),
                    )
            elif start_tag in line:
                if line != "    /* " + start_tag:
                    raise LineError(
                        line_no,
                        'Invalid start tag "%s". Expects literally "    /* %s"'
                        % (line, start_tag),
                    )
                if lines is not None:
                    raise LineError(
                        line_no, 'Invalid start tag "%s", missing end-tag' % (line,)
                    )
                lines = []
                line_no_start = line_no
            elif end_tag in line and lines is not None:
                if line != "     * " + end_tag:
                    raise LineError(line_no, 'Invalid end tag: "%s"' % (line,))
                parsed_data = parse_data(tag, line_no_start, lines)
                if not parsed_data:
                    raise Exception('invalid data: line %s, "%s"' % (line_no, lines))
                dbg("> > > property: %s" % (parsed_data["property"],))
                if _dbg_level > 1:
                    for keyword in sorted(parsed_data.keys()):
                        v = parsed_data[keyword]
                        if v is not None:
                            v = '"%s"' % (v,)
                        dbg(
                            "> > > > [%s] (%s) = %s" % (keyword, keywords[keyword], v),
                            level=2,
                        )
                write_data(tag, setting_node, line_no_start, parsed_data)
                lines = None
            elif lines is not None:
                lines.append(line)
        if lines is not None or just_had_end_tag:
            raise LineError(line_no_start, "Unterminated start tag")
 def process_settings_docs(tag, output, source_files):
    dbg("> tag:%s, output:%s" % (tag, output))
    root_node = ET.Element("nm-setting-docs")
    for setting_name, source_file in get_file_infos(source_files):
        try:
            process_setting(tag, root_node, source_file, setting_name)
        except LineError as e:
            raise Exception(
                "Error parsing %s, line %s (tag:%s, setting_name:%s): %s"
                % (source_file, e.line_no, tag, setting_name, str(e))
            )
        except Exception as e:
            raise Exception(
                "Error parsing %s (tag:%s, setting_name:%s): %s"
                % (source_file, tag, setting_name, str(e))
            )
    ET.ElementTree(root_node).write(output)
 def main():
    if len(sys.argv) < 4:
        print("Usage: %s [tag] [output-xml-file] [srcfiles...]" % (sys.argv[0]))
        exit(1)
    process_settings_docs(
        tag=sys.argv[1], output=sys.argv[2], source_files=sys.argv[3:]
    )
 if __name__ == "__main__":
    main()
 ###############################################################################
 # Tests
 ###############################################################################
 def setup_module():
    global pytest
    import pytest
 def t_srcdir():
    return os.path.abspath(os.path.dirname(__file__) + "/..")
 def t_setting_c(name):
    return t_srcdir() + f"/src/libnm-core-impl/nm-setting-{name}.c"
 def test_file_location():
    assert t_srcdir() + "/tools/generate-docs-nm-property-infos.py" == os.path.abspath(
        __file__
    )
    assert os.path.isfile(t_srcdir() + "/src/libnm-core-impl/nm-setting-connection.c")
    assert os.path.isfile(t_setting_c("ip-config"))
 def test_get_setting_names():
    assert "connection" == get_setting_names(
        t_srcdir() + "/src/libnm-core-impl/nm-setting-connection.c"
    )
    assert "ipv4" == get_setting_names(
        t_srcdir() + "/src/libnm-core-impl/nm-setting-ip4-config.c"
    )
    assert None == get_setting_names(
        t_srcdir() + "/src/libnm-core-impl/nm-setting-ip-config.c"
    )
 def test_get_file_infos():
    t = ["connection", "ip-config", "ip4-config", "proxy", "wired"]
    assert [
        (
            "connection",
            t_setting_c("connection"),
        ),
        (
            "ipv4",
            t_setting_c("ip4-config"),
        ),
        ("proxy", t_setting_c("proxy")),
        (
            "802-3-ethernet",
            t_setting_c("wired"),
        ),
    ] == list(get_file_infos([t_setting_c(x) for x in t]))
 def test_process_setting():
    root_node = ET.Element("nm-setting-docs")
    process_setting("nmcli", root_node, t_setting_c("connection"), "connection")