11import consola from "consola" ;
22import { HTMLAnchorElement , HTMLElement , parseHTML } from "linkedom" ;
3+ import { buildScreenshotUrl } from "../utils/urls" ;
34
45export async function crawlExtension (
56 id : string ,
@@ -21,7 +22,7 @@ export async function crawlExtension(
2122 const { document } = parseHTML ( html ) ;
2223
2324 // Uncomment to debug HTML
24- // Bun.write("chrome.html", document.documentElement.outerHTML);
25+ Bun . write ( "chrome.html" , document . documentElement . outerHTML ) ;
2526
2627 // Basic metadata
2728 const name = metaContent ( document , "property=og:title" ) ?. replace (
@@ -106,6 +107,23 @@ export async function crawlExtension(
106107 // const rating = extractNumber(ratingDiv.title); // "Average rating: 4.78 stars"
107108 // const reviewCount = extractNumber(ratingDiv.textContent); // "(1024)"
108109
110+ // <div
111+ // aria-label="Item media 1 screenshot"
112+ // data-media-url="https://lh3.googleusercontent.com/GUgh0ThX2FDPNvbaumYl4DqsUhsbYiCe-Hut9FoVEnkmTrXyA-sHbMk5jmZTj_t-dDP8rAmy6X6a6GNTCn9F8zo4VYU"
113+ // data-is-video="false"
114+ // data-slide-index="0"
115+ // >
116+ const screenshots = [ ...document . querySelectorAll ( "div[data-media-url]" ) ]
117+ . filter ( ( div ) => div . getAttribute ( "data-is-video" ) === "false" )
118+ . map < Gql . Screenshot > ( ( div ) => {
119+ const index = Number ( div . getAttribute ( "data-slide-index" ) || - 1 ) ;
120+ return {
121+ index,
122+ rawUrl : div . getAttribute ( "data-media-url" ) + "=s1280" , // "s1280" gets the full resolution
123+ indexUrl : buildScreenshotUrl ( "chrome-extensions" , id , index ) ,
124+ } ;
125+ } ) ;
126+
109127 if ( name == null ) return ;
110128 if ( storeUrl == null ) return ;
111129 if ( iconUrl == null ) return ;
@@ -114,6 +132,12 @@ export async function crawlExtension(
114132 if ( version == null ) return ;
115133 if ( shortDescription == null ) return ;
116134 if ( longDescription == null ) return ;
135+ if (
136+ screenshots . some (
137+ ( screenshot ) => screenshot . index === - 1 || ! screenshot . rawUrl ,
138+ )
139+ )
140+ return ;
117141
118142 const result : Gql . ChromeExtension = {
119143 id,
@@ -127,6 +151,7 @@ export async function crawlExtension(
127151 longDescription,
128152 rating,
129153 reviewCount,
154+ screenshots,
130155 } ;
131156 consola . debug ( "Crawl results:" , result ) ;
132157 return result ;
0 commit comments