Skip to content
This repository was archived by the owner on Nov 3, 2020. It is now read-only.

Commit 9a90bc0

Browse files
authored
Loading bar
An upgraded version of #29 only that this time we have a visual loading bar! This should probably mark the 1.0.0 release.
1 parent 0089d39 commit 9a90bc0

File tree

1 file changed

+16
-10
lines changed

1 file changed

+16
-10
lines changed

games.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@
88
from time import sleep
99
from tqdm import tqdm
1010

11-
## -------------------------CREATING THE "LOADING" BAR--------------------------
12-
13-
pbar = tqdm(desc="Pages scrapped", ascii=True, mininterval=0.3, unit=" pages")
14-
1511
## ----------------------------CREATING THE SPIDER------------------------------
1612

1713
class ListSpider(scrapy.Spider):
@@ -20,26 +16,36 @@ class ListSpider(scrapy.Spider):
2016
'LOG_LEVEL': 'ERROR',
2117
}
2218

23-
## ----------------------------GETTING GAMES URLs-------------------------------
19+
## ----------------------------DEFINING THE SPIDER------------------------------
2420

2521
# We define the arguments, more information in PR #16
2622
def __init__(self, start_page=0, delay=3, items_per_page=100, **kwargs):
2723
self.start_urls = [f'https://www.metacritic.com/browse/games/score/metascore/all/all/filtered?page={start_page}']
2824
# We declare delay and "i_p_p" generally outside the variable as we will need it later
2925
self.delay = int(delay)
3026
self.items_per_page = int(items_per_page)
27+
self.start_page = int(start_page)
3128
super().__init__(**kwargs)
32-
33-
# Get the last page number
34-
# last_page_num = int(('.last_page a ::text').get())
29+
30+
## ----------------------------GETTING GAMES URLs-------------------------------
3531

3632
def parse(self, response):
33+
## Creating the loading bar!
34+
# We need the last number for ETA
35+
last_page_num = int(response.css('.last_page a ::text').get())
36+
# We check the page in which we are for as we only need to summon the loading bar in the first page
37+
current_page = int(response.css('.active_page span ::text').get()) - 1
38+
if current_page == self.start_page:
39+
self.pbar = tqdm(total=last_page_num - self.start_page, desc="Listing games", ascii=True, unit="page")
40+
41+
## The scrapping
42+
# System for items_per_page to work
3743
num_of_games_on_page = len(response.css('.product_wrap > .product_title a::attr(href)').getall())
3844
end = num_of_games_on_page if num_of_games_on_page <= self.items_per_page else self.items_per_page
3945

4046
for x in range(0, end):
4147
yield {
42-
#Extracts the link of the game
48+
#Extracts the link of the game and stores it
4349
'f': response.css('.product_wrap > .product_title a::attr(href)')[x].get()
4450
}
4551

@@ -49,7 +55,7 @@ def parse(self, response):
4955
NEXT_PAGE_SELECTOR = '.next a ::attr(href)'
5056
next_page = response.css(NEXT_PAGE_SELECTOR).get()
5157
## Increase the completed value
52-
pbar.update(1)
58+
self.pbar.update(1)
5359

5460
# Travelling to the next page :D
5561
if next_page:

0 commit comments

Comments
 (0)