66
77from pydoclint .utils .doc import Doc
88
9+ _SPHINX_KEYWORDS = (
10+ ':param ' ,
11+ ':type ' ,
12+ ':raises ' ,
13+ ':return:' ,
14+ ':rtype:' ,
15+ ':yield:' ,
16+ ':ytype:' ,
17+ )
18+
19+ _GOOGLE_KEYWORDS = (
20+ 'Args:' ,
21+ 'Returns:' ,
22+ 'Yields:' ,
23+ 'Raises:' ,
24+ 'Examples:' ,
25+ 'Notes:' ,
26+ )
27+
928
1029def _containsNumpyStylePattern (docstring : str ) -> bool :
1130 # Check if docstring contains numpy-style section headers with dashes.
@@ -31,6 +50,72 @@ def _containsNumpyStylePattern(docstring: str) -> bool:
3150 return bool (re .search (pattern , docstring , re .MULTILINE | re .IGNORECASE ))
3251
3352
53+ def _containsSphinxStylePattern (docstring : str ) -> bool :
54+ """
55+ Check if docstring contains Sphinx-style field lists at base indentation.
56+
57+ Only lines that have the same leading indentation as the docstring
58+ definition (i.e., the opening triple quotes) count as valid Sphinx
59+ directives. Lines with more or fewer leading spaces are ignored.
60+ """
61+ leadingIndent = _detectDocstringIndent (docstring )
62+ for line in docstring .splitlines ():
63+ stripped = line .lstrip ()
64+ if stripped == '' :
65+ continue
66+
67+ currentIndent = len (line ) - len (stripped )
68+ if currentIndent != leadingIndent :
69+ continue
70+
71+ for keyword in _SPHINX_KEYWORDS :
72+ if stripped .startswith (keyword ):
73+ return True
74+
75+ return False
76+
77+
78+ def _containsGoogleStylePattern (docstring : str ) -> bool :
79+ """
80+ Check if docstring contains Google-style section headers at base indent.
81+ """
82+ leadingIndent = _detectDocstringIndent (docstring )
83+ for line in docstring .splitlines ():
84+ stripped = line .lstrip ()
85+ if stripped == '' :
86+ continue
87+
88+ currentIndent = len (line ) - len (stripped )
89+ if currentIndent != leadingIndent :
90+ continue
91+
92+ for keyword in _GOOGLE_KEYWORDS :
93+ if stripped .startswith (keyword ):
94+ return True
95+
96+ return False
97+
98+
99+ def _detectDocstringIndent (docstring : str ) -> int :
100+ """
101+ Detect the leading indentation level of a docstring.
102+
103+ This approximates the column where the opening triple quotes are placed by
104+ measuring the smallest indentation across non-empty lines.
105+ """
106+ indent : int | None = None
107+ for line in docstring .splitlines ():
108+ stripped = line .lstrip ()
109+ if stripped == '' :
110+ continue
111+
112+ currentIndent = len (line ) - len (stripped )
113+ if indent is None or currentIndent < indent :
114+ indent = currentIndent
115+
116+ return 0 if indent is None else indent
117+
118+
34119def parseDocstring (
35120 docstring : str ,
36121 userSpecifiedStyle : str ,
@@ -39,40 +124,51 @@ def parseDocstring(
39124 Parse docstring in all 3 docstring styles and return the one that is parsed
40125 with the most likely style.
41126 """
42- # Check if docstring contains numpy-style section headers with dashes
43- if _containsNumpyStylePattern (docstring ):
44- # Force numpy style parsing when numpy pattern is detected
45- docNumpy , excNumpy = parseDocstringInGivenStyle (docstring , 'numpy' )
46- return docNumpy , excNumpy , userSpecifiedStyle != 'numpy'
47-
48- docNumpy , excNumpy = parseDocstringInGivenStyle (docstring , 'numpy' )
49- docGoogle , excGoogle = parseDocstringInGivenStyle (docstring , 'google' )
50- docSphinx , excSphinx = parseDocstringInGivenStyle (docstring , 'sphinx' )
51-
52- docstrings : dict [str , Doc ] = {
53- 'numpy' : docNumpy ,
54- 'google' : docGoogle ,
55- 'sphinx' : docSphinx ,
56- }
57- docstringSizes : dict [str , int ] = {
58- 'numpy' : docNumpy .docstringSize ,
59- 'google' : docGoogle .docstringSize ,
60- 'sphinx' : docSphinx .docstringSize ,
61- }
62- parsingExceptions : dict [str , ParseError | None ] = {
63- 'numpy' : excNumpy ,
64- 'google' : excGoogle ,
65- 'sphinx' : excSphinx ,
127+ isLikelyNumpy : bool = _containsNumpyStylePattern (docstring )
128+ isLikelyGoogle : bool = _containsGoogleStylePattern (docstring )
129+ isLikelySphinx : bool = _containsSphinxStylePattern (docstring )
130+
131+ if isLikelyNumpy :
132+ # Numpy-style headers with dashes are strong indicators; ignore other
133+ # potential matches when they appear alongside them.
134+ isLikelyGoogle = False
135+ isLikelySphinx = False
136+
137+ likelyStyles = {
138+ 'numpy' : isLikelyNumpy ,
139+ 'google' : isLikelyGoogle ,
140+ 'sphinx' : isLikelySphinx ,
66141 }
67- # Whichever style has the largest docstring size, we think that it is
68- # the actual style that the docstring is written in.
69- maxDocstringSize = max (docstringSizes .values ())
70- styleMismatch : bool = docstringSizes [userSpecifiedStyle ] < maxDocstringSize
71- return (
72- docstrings [userSpecifiedStyle ],
73- parsingExceptions [userSpecifiedStyle ],
74- styleMismatch ,
75- )
142+ matchedStyles = [
143+ style for style , matched in likelyStyles .items () if matched
144+ ]
145+
146+ styleMismatch : bool
147+
148+ if len (matchedStyles ) == 1 :
149+ detectedStyle = matchedStyles [0 ]
150+ if detectedStyle == userSpecifiedStyle :
151+ doc , exc = parseDocstringInGivenStyle (docstring , detectedStyle )
152+ # The Google parser raises hard errors when sections are malformed,
153+ # which is a strong signal the docstring is effectively written in
154+ # a different style. Numpy/Sphinx parsers are more permissive, so
155+ # we surface only the parsing error (DOC001) without flagging a
156+ # style mismatch in those cases.
157+ styleMismatch = exc is not None and detectedStyle == 'google'
158+ return doc , exc , styleMismatch
159+
160+ doc , exc = parseDocstringInGivenStyle (docstring , detectedStyle )
161+ styleMismatch = True
162+ return doc , exc , styleMismatch
163+
164+ if len (matchedStyles ) == 0 :
165+ doc , exc = parseDocstringInGivenStyle (docstring , userSpecifiedStyle )
166+ styleMismatch = False
167+ return doc , exc , styleMismatch
168+
169+ doc , exc = parseDocstringInGivenStyle (docstring , userSpecifiedStyle )
170+ styleMismatch = True
171+ return doc , exc , styleMismatch
76172
77173
78174def parseDocstringInGivenStyle (
0 commit comments