88from time import sleep
99from tqdm import tqdm
1010
11- ## -------------------------CREATING THE "LOADING" BAR--------------------------
12-
13- pbar = tqdm (desc = "Pages scrapped" , ascii = True , mininterval = 0.3 , unit = " pages" )
14-
1511## ----------------------------CREATING THE SPIDER------------------------------
1612
1713class ListSpider (scrapy .Spider ):
@@ -20,26 +16,36 @@ class ListSpider(scrapy.Spider):
2016 'LOG_LEVEL' : 'ERROR' ,
2117 }
2218
23- ## ----------------------------GETTING GAMES URLs -------------------------------
19+ ## ----------------------------DEFINING THE SPIDER ------------------------------
2420
2521 # We define the arguments, more information in PR #16
2622 def __init__ (self , start_page = 0 , delay = 3 , items_per_page = 100 , ** kwargs ):
2723 self .start_urls = [f'https://www.metacritic.com/browse/games/score/metascore/all/all/filtered?page={ start_page } ' ]
2824 # We declare delay and "i_p_p" generally outside the variable as we will need it later
2925 self .delay = int (delay )
3026 self .items_per_page = int (items_per_page )
27+ self .start_page = int (start_page )
3128 super ().__init__ (** kwargs )
32-
33- # Get the last page number
34- # last_page_num = int(('.last_page a ::text').get())
29+
30+ ## ----------------------------GETTING GAMES URLs-------------------------------
3531
3632 def parse (self , response ):
33+ ## Creating the loading bar!
34+ # We need the last number for ETA
35+ last_page_num = int (response .css ('.last_page a ::text' ).get ())
36+ # We check the page in which we are for as we only need to summon the loading bar in the first page
37+ current_page = int (response .css ('.active_page span ::text' ).get ()) - 1
38+ if current_page == self .start_page :
39+ self .pbar = tqdm (total = last_page_num - self .start_page , desc = "Listing games" , ascii = True , unit = "page" )
40+
41+ ## The scrapping
42+ # System for items_per_page to work
3743 num_of_games_on_page = len (response .css ('.product_wrap > .product_title a::attr(href)' ).getall ())
3844 end = num_of_games_on_page if num_of_games_on_page <= self .items_per_page else self .items_per_page
3945
4046 for x in range (0 , end ):
4147 yield {
42- #Extracts the link of the game
48+ #Extracts the link of the game and stores it
4349 'f' : response .css ('.product_wrap > .product_title a::attr(href)' )[x ].get ()
4450 }
4551
@@ -49,7 +55,7 @@ def parse(self, response):
4955 NEXT_PAGE_SELECTOR = '.next a ::attr(href)'
5056 next_page = response .css (NEXT_PAGE_SELECTOR ).get ()
5157 ## Increase the completed value
52- pbar .update (1 )
58+ self . pbar .update (1 )
5359
5460 # Travelling to the next page :D
5561 if next_page :
0 commit comments