-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: update parser to correctly parse desired tokens #55
Changes from 3 commits
aa1c7b8
5830e6d
aa9abfc
18757e8
7baff61
aabd1d3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -270,6 +270,124 @@ def _extract_signature(obj_sig): | |
return signature, parameters | ||
|
||
|
||
# Given documentation docstring, parse them into summary_info. | ||
def _extract_docstring_info(summary_info, summary, name): | ||
top_summary = "" | ||
|
||
# Initialize known types needing further processing. | ||
var_types = { | ||
':rtype:': 'returns', | ||
':returns:': 'returns', | ||
':type': 'variables', | ||
':param': 'variables', | ||
':raises': 'exceptions', | ||
':raises:': 'exceptions' | ||
} | ||
|
||
# Clean the string by cleaning newlines and backlashes, then split by white space. | ||
config = Config(napoleon_use_param=True, napoleon_use_rtype=True) | ||
# Convert Google style to reStructuredText | ||
parsed_text = str(GoogleDocstring(summary, config)) | ||
|
||
# Trim the top summary but maintain its formatting. | ||
indexes = [] | ||
for types in var_types: | ||
# Ensure that we look for exactly the string we want. | ||
# Adding the extra space for non-colon ending types | ||
# helps determine if we simply ran into desired occurrence | ||
# or if we ran into a similar looking syntax but shouldn't | ||
# parse upon it. | ||
types += ' ' if types[-1] != ':' else '' | ||
if types in parsed_text: | ||
index = parsed_text.find(types) | ||
if index > -1: | ||
# For now, skip on parsing custom fields like attribute | ||
if types == ':type ' and 'attribute::' in parsed_text: | ||
continue | ||
indexes.append(index) | ||
|
||
# If we found types needing further processing, locate its index, | ||
# if we found empty array for indexes, stop processing further. | ||
index = min(indexes) if indexes else 0 | ||
|
||
# Store the top summary separately. | ||
if index == 0: | ||
top_summary = summary | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we return at this point and avoid needing the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just |
||
else: | ||
top_summary = parsed_text[:index] | ||
parsed_text = parsed_text[index:] | ||
|
||
# Clean up whitespace and other characters | ||
parsed_text = " ".join(filter(None, re.split(r'\n| |\|\s', parsed_text))).split(" ") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missed this before -- why do we need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I understand There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the order seemed to have mattered, no need for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for filter, it is slightly different.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If >>> stuff = ["one two", "three four"]
>>> " ".join(stuff).split(" ")
['one', 'two', 'three', 'four'] (no idea what the line does, though, not familiar with the extension 😄 ) |
||
|
||
cur_type = '' | ||
words = [] | ||
arg_name = '' | ||
index = 0 | ||
# Used to track return type and description | ||
r_type, r_descr = '', '' | ||
|
||
# Using counter iteration to easily extract names rather than | ||
# coming up with more complicated stopping logic for each tags. | ||
while index <= len(parsed_text): | ||
word = parsed_text[index] if index < len(parsed_text) else "" | ||
# Check if we encountered specific words. | ||
if word in var_types or index == len(parsed_text): | ||
# Finish processing previous section. | ||
if cur_type: | ||
if cur_type == ':type': | ||
summary_info[var_types[cur_type]][arg_name]['var_type'] = " ".join(words) | ||
elif cur_type == ':param': | ||
summary_info[var_types[cur_type]][arg_name]['description'] = " ".join(words) | ||
elif ":raises" in cur_type: | ||
summary_info[var_types[cur_type]].append({ | ||
'var_type': arg_name, | ||
'description': " ".join(words) | ||
}) | ||
else: | ||
if cur_type == ':rtype:': | ||
r_type = " ".join(words) | ||
else: | ||
r_descr = " ".join(words) | ||
if r_type and r_descr: | ||
summary_info[var_types[cur_type]].append({ | ||
'var_type': r_type, | ||
'description': r_descr | ||
}) | ||
r_type, r_descr = '', '' | ||
|
||
else: | ||
|
||
# If after we processed the top summary and get in this state, | ||
# likely we encountered a type that's not covered above or the docstring | ||
# was formatted badly. This will likely break docfx job later on, should not | ||
# process further. | ||
if word not in var_types: | ||
raise ValueError(f"Encountered wrong formatting, please check docstring for {name}") | ||
|
||
# Reached end of string, break after finishing processing | ||
if index == len(parsed_text): | ||
break | ||
|
||
# Start processing for new section | ||
cur_type = word | ||
if cur_type in [':type', ':param', ':raises', ':raises:']: | ||
index += 1 | ||
arg_name = parsed_text[index][:-1] | ||
# Initialize empty dictionary if it doesn't exist already | ||
if arg_name not in summary_info[var_types[cur_type]] and ':raises' not in cur_type: | ||
summary_info[var_types[cur_type]][arg_name] = {} | ||
|
||
# Empty target string | ||
words = [] | ||
else: | ||
words.append(word) | ||
|
||
index += 1 | ||
|
||
return top_summary | ||
|
||
|
||
def _create_datam(app, cls, module, name, _type, obj, lines=None): | ||
""" | ||
Build the data structure for an autodoc class | ||
|
@@ -291,108 +409,6 @@ def _update_friendly_package_name(path): | |
|
||
return path | ||
|
||
def _extract_docstring_info(summary_info, summary): | ||
top_summary = "" | ||
|
||
# Initialize known types needing further processing. | ||
var_types = { | ||
':rtype:': 'returns', | ||
':returns:': 'returns', | ||
':type': 'variables', | ||
':param': 'variables', | ||
':raises': 'exceptions', | ||
':raises:': 'exceptions' | ||
} | ||
|
||
# Clean the string by cleaning newlines and backlashes, then split by white space. | ||
config = Config(napoleon_use_param=True, napoleon_use_rtype=True) | ||
# Convert Google style to reStructuredText | ||
parsed_text = str(GoogleDocstring(summary, config)) | ||
|
||
# Trim the top summary but maintain its formatting. | ||
indexes = [] | ||
for types in var_types: | ||
index = parsed_text.find(types) | ||
if index > -1: | ||
# For now, skip on parsing custom fields like attribute | ||
if types == ':type' and 'attribute::' in parsed_text: | ||
continue | ||
indexes.append(index) | ||
|
||
# If we found types needing further processing, locate its index, | ||
# if we found empty array for indexes, stop processing further. | ||
index = min(indexes) if indexes else 0 | ||
|
||
# Store the top summary separately. | ||
if index == 0: | ||
top_summary = summary | ||
else: | ||
top_summary = parsed_text[:index] | ||
parsed_text = parsed_text[index:] | ||
|
||
# Clean up whitespace and other characters | ||
parsed_text = " ".join(filter(None, re.split(r'\n| |\|\s', parsed_text))).split(" ") | ||
|
||
cur_type = '' | ||
words = [] | ||
arg_name = '' | ||
index = 0 | ||
|
||
# Using counter iteration to easily extract names rather than | ||
# coming up with more complicated stopping logic for each tags. | ||
while index <= len(parsed_text): | ||
word = parsed_text[index] if index < len(parsed_text) else "" | ||
# Check if we encountered specific words. | ||
if word in var_types or index == len(parsed_text): | ||
# Finish processing previous section. | ||
if cur_type: | ||
if cur_type == ':type': | ||
summary_info[var_types[cur_type]][arg_name]['var_type'] = " ".join(words) | ||
elif cur_type == ':param': | ||
summary_info[var_types[cur_type]][arg_name]['description'] = " ".join(words) | ||
elif ":raises" in cur_type: | ||
summary_info[var_types[cur_type]].append({ | ||
'var_type': arg_name, | ||
'description': " ".join(words) | ||
}) | ||
elif cur_type == ':rtype:': | ||
arg_name = " ".join(words) | ||
else: | ||
summary_info[var_types[cur_type]].append({ | ||
'var_type': arg_name, | ||
'description': " ".join(words) | ||
}) | ||
else: | ||
# If after we processed the top summary and get in this state, | ||
# likely we encountered a type that's not covered above or the docstring | ||
# was formatted badly. This will likely break docfx job later on, should not | ||
# process further. | ||
if word not in var_types: | ||
raise ValueError("Encountered wrong formatting, please check docstrings") | ||
|
||
# Reached end of string, break after finishing processing | ||
if index == len(parsed_text): | ||
break | ||
|
||
# Start processing for new section | ||
cur_type = word | ||
if cur_type in [':type', ':param', ':raises', ':raises:']: | ||
index += 1 | ||
arg_name = parsed_text[index][:-1] | ||
# Initialize empty dictionary if it doesn't exist already | ||
if arg_name not in summary_info[var_types[cur_type]] and ':raises' not in cur_type: | ||
summary_info[var_types[cur_type]][arg_name] = {} | ||
|
||
# Empty target string | ||
words = [] | ||
else: | ||
words.append(word) | ||
|
||
index += 1 | ||
|
||
return top_summary | ||
|
||
|
||
if lines is None: | ||
lines = [] | ||
short_name = name.split('.')[-1] | ||
|
@@ -421,7 +437,6 @@ def _extract_docstring_info(summary_info, summary): | |
lines = lines.split("\n") if lines else [] | ||
except TypeError as e: | ||
print("couldn't getdoc from method, function: {}".format(e)) | ||
|
||
|
||
elif _type in [PROPERTY]: | ||
lines = inspect.getdoc(obj) | ||
|
@@ -503,7 +518,7 @@ def _extract_docstring_info(summary_info, summary): | |
|
||
# Extract summary info into respective sections. | ||
if summary: | ||
top_summary = _extract_docstring_info(summary_info, summary) | ||
top_summary = _extract_docstring_info(summary_info, summary, name) | ||
datam['summary'] = top_summary | ||
|
||
if args or sig or summary_info: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from docfx_yaml.extension import find_unique_name | ||
from docfx_yaml.extension import disambiguate_toc_name | ||
from docfx_yaml.extension import _extract_docstring_info | ||
|
||
import unittest | ||
|
||
|
@@ -44,5 +45,128 @@ def test_disambiguate_toc_name(self): | |
|
||
self.assertEqual(yaml_want, yaml_got) | ||
|
||
|
||
def test_extract_docstring_info(self): | ||
|
||
## Test for normal input | ||
top_summary1_want = "\nSimple test for docstring.\n\n" | ||
summary_info1_want = { | ||
'variables': { | ||
'arg1': { | ||
'var_type': 'int', | ||
'description': 'simple description.' | ||
}, | ||
'arg2': { | ||
'var_type': 'str', | ||
'description': 'simple description for `arg2`.' | ||
} | ||
}, | ||
'returns': [ | ||
{ | ||
'var_type': 'str', | ||
'description': 'simple description for return value.' | ||
} | ||
], | ||
'exceptions': [ | ||
{ | ||
'var_type': 'AttributeError', | ||
'description': 'if `condition x`.' | ||
} | ||
] | ||
} | ||
|
||
summary_info1_got = { | ||
'variables': {}, | ||
'returns': [], | ||
'exceptions': [] | ||
} | ||
|
||
summary1 = """ | ||
Simple test for docstring. | ||
|
||
Args: | ||
arg1(int): simple description. | ||
arg2(str): simple description for `arg2`. | ||
|
||
Returns: | ||
str: simple description for return value. | ||
|
||
Raises: | ||
AttributeError: if `condition x`. | ||
""" | ||
|
||
top_summary1_got = _extract_docstring_info(summary_info1_got, summary1, "") | ||
|
||
self.assertEqual(top_summary1_got, top_summary1_want) | ||
self.assertEqual(summary_info1_got, summary_info1_want) | ||
|
||
|
||
## Test for input coming in mixed format. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider creating a separate test case for each There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
summary2 = """ | ||
Simple test for docstring. | ||
|
||
:type arg1: int | ||
:param arg1: simple description. | ||
:param arg2: simple description for `arg2`. | ||
:type arg2: str | ||
|
||
:rtype: str | ||
:returns: simple description for return value. | ||
|
||
:raises AttributeError: if `condition x`. | ||
""" | ||
|
||
summary_info2_got = { | ||
'variables': {}, | ||
'returns': [], | ||
'exceptions': [] | ||
} | ||
|
||
top_summary2_got = _extract_docstring_info(summary_info2_got, summary2, "") | ||
|
||
# Output should be same as test 1 with normal input. | ||
self.assertEqual(top_summary2_got, top_summary1_want) | ||
self.assertEqual(summary_info2_got, summary_info1_want) | ||
|
||
|
||
## Test for parser to correctly scan docstring tokens and not custom fields | ||
summary_info3_want = { | ||
'variables': {}, | ||
'returns': [], | ||
'exceptions': [] | ||
} | ||
|
||
summary3 = """ | ||
Union[int, None]: Expiration time in milliseconds for a partition. | ||
|
||
If :attr:`partition_expiration` is set and <xref:type_> is | ||
not set, :attr:`type_` will default to | ||
:attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`. | ||
It could return :param: with :returns as well. | ||
""" | ||
|
||
summary_info3_got = { | ||
'variables': {}, | ||
'returns': [], | ||
'exceptions': [] | ||
} | ||
|
||
# Nothing should change | ||
top_summary3_want = summary3 | ||
|
||
top_summary3_got = _extract_docstring_info(summary_info3_got, summary3, "") | ||
|
||
self.assertEqual(top_summary3_got, top_summary3_want) | ||
self.assertEqual(summary_info3_got, summary_info3_want) | ||
|
||
## Test for incorrectly formmatted docstring raising error | ||
summary4 = """ | ||
Description of docstring which should fail. | ||
|
||
:returns:param: | ||
""" | ||
with self.assertRaises(ValueError): | ||
_extract_docstring_info(summary_info3_got, summary4, "error string") | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor: extra space at the beginning of these comments?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.