@@ -2121,7 +2121,12 @@ class GoogleParser(object):
21212121 if mime :
21222122 title = mime .text + ' ' + title
21232123 url = self .unwrap_link (a .attr ('href' ))
2124- abstract = div_g .select ('.st' ).text .replace ('\n ' , '' )
2124+ matched_keywords = []
2125+ abstract = ''
2126+ for childnode in div_g .select ('.st' ).children :
2127+ if childnode .tag == 'b' and childnode .text != '...' :
2128+ matched_keywords .append ({'phrase' : childnode .text , 'offset' : len (abstract )})
2129+ abstract = abstract + childnode .text .replace ('\n ' , '' )
21252130 try :
21262131 metadata = div_g .select ('.slp' ).text
21272132 metadata = metadata .replace ('\u200e ' , '' ).replace (' - ' , ', ' ).strip ()
@@ -2141,7 +2146,7 @@ class GoogleParser(object):
21412146 continue
21422147 index += 1
21432148 self .results .append (Result (index , title , url , abstract ,
2144- metadata = metadata , sitelinks = sitelinks ))
2149+ metadata = metadata , sitelinks = sitelinks , matches = matched_keywords ))
21452150
21462151 # Showing results for ...
21472152 # Search instead for ...
@@ -2221,6 +2226,7 @@ class Result(object):
22212226 abstract : str
22222227 metadata : str or None
22232228 sitelinks : list
2229+ matches : list
22242230
22252231 Class Variables
22262232 ---------------
@@ -2238,14 +2244,15 @@ class Result(object):
22382244 colors = None
22392245 urlexpand = True
22402246
2241- def __init__ (self , index , title , url , abstract , metadata = None , sitelinks = None ):
2247+ def __init__ (self , index , title , url , abstract , metadata = None , sitelinks = None , matches = None ):
22422248 index = str (index )
22432249 self .index = index
22442250 self .title = title
22452251 self .url = url
22462252 self .abstract = abstract
22472253 self .metadata = metadata
22482254 self .sitelinks = [] if sitelinks is None else sitelinks
2255+ self .matches = [] if matches is None else matches
22492256
22502257 self ._urltable = {index : url }
22512258 subindex = 'a'
@@ -2276,7 +2283,7 @@ class Result(object):
22762283 else :
22772284 print (' %s%-*s %s %s' % (' ' * pre , indent , index + '.' , title , url ))
22782285
2279- def _print_metadata_and_abstract (self , abstract , metadata = None , indent = 5 , pre = 0 ):
2286+ def _print_metadata_and_abstract (self , abstract , metadata = None , matches = None , indent = 5 , pre = 0 ):
22802287 colors = self .colors
22812288 try :
22822289 columns , _ = os .get_terminal_size ()
@@ -2290,6 +2297,15 @@ class Result(object):
22902297 print (' ' * (indent + pre ) + metadata )
22912298
22922299 if colors :
2300+ # Start from the last match, as inserting the bold characters changes the offsets.
2301+ for match in reversed (matches or []):
2302+ abstract = (
2303+ abstract [: match ['offset' ]]
2304+ + '\033 [1m'
2305+ + match ['phrase' ]
2306+ + '\033 [0m'
2307+ + abstract [match ['offset' ] + len (match ['phrase' ]) :]
2308+ )
22932309 print (colors .abstract , end = '' )
22942310 if columns > indent + 1 + pre :
22952311 # Try to fill to columns
@@ -2305,7 +2321,7 @@ class Result(object):
23052321 def print (self ):
23062322 """Print the result entry."""
23072323 self ._print_title_and_url (self .index , self .title , self .url )
2308- self ._print_metadata_and_abstract (self .abstract , metadata = self .metadata )
2324+ self ._print_metadata_and_abstract (self .abstract , metadata = self .metadata , matches = self . matches )
23092325
23102326 for sitelink in self .sitelinks :
23112327 self ._print_title_and_url (sitelink .index , sitelink .title , sitelink .url , pre = 4 )
@@ -2322,6 +2338,8 @@ class Result(object):
23222338 obj ['metadata' ] = self .metadata
23232339 if self .sitelinks :
23242340 obj ['sitelinks' ] = [sitelink .__dict__ for sitelink in self .sitelinks ]
2341+ if self .matches :
2342+ obj ['matches' ] = self .matches
23252343 return obj
23262344
23272345 def urltable (self ):
0 commit comments