Skip to content

Commit b4bc555

Browse files
feat(lookup): add crawler info on lookup (#2)
* feat(lookup): add crawler info on lookup * fix(lookup): crawlers should have device info other
1 parent a0a0fa2 commit b4bc555

File tree

3 files changed

+86
-8
lines changed

3 files changed

+86
-8
lines changed

types.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,16 @@ type Udger struct {
1818
Browsers map[int]Browser
1919
OS map[int]OS
2020
Devices map[int]Device
21+
crawlerTypes map[int]string
22+
Crawlers map[string]Crawler
2123
}
2224

2325
// Info is the struct returned by the Lookup(ua string) function, contains everything about the UA
2426
type Info struct {
2527
Browser Browser `json:"browser"`
2628
OS OS `json:"os"`
2729
Device Device `json:"device"`
30+
Crawler Crawler `json:"crawler"`
2831
}
2932

3033
// Browser contains information about the browser type, engine and off course its name
@@ -58,3 +61,12 @@ type Device struct {
5861
Name string `json:"name"`
5962
Icon string `json:"icon"`
6063
}
64+
65+
// Crawler contains all the information about the crawler
66+
type Crawler struct {
67+
Name string `json:"name"`
68+
Family string `json:"family"`
69+
Vendor string `json:"vendor"`
70+
ClassId int
71+
Class string `json:"classification"`
72+
}

udger.go

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"strings"
99
)
1010

11+
const CRAWLER_CLASS_ID = 99
12+
1113
// New creates a new instance of Udger from the dbPath database loaded in memory for fast lookup.
1214
func New(dbPath string) (*Udger, error) {
1315
u := &Udger{
@@ -16,6 +18,8 @@ func New(dbPath string) (*Udger, error) {
1618
Devices: make(map[int]Device),
1719
browserTypes: make(map[int]string),
1820
browserOS: make(map[int]int),
21+
crawlerTypes: make(map[int]string),
22+
Crawlers: make(map[string]Crawler),
1923
}
2024
var err error
2125

@@ -53,6 +57,13 @@ func (udger *Udger) Lookup(ua string) (*Info, error) {
5357
info.Browser.typ = -1
5458
}
5559

60+
if crawler, found := udger.Crawlers[ua]; found {
61+
info.Crawler = crawler
62+
info.Crawler.Class = udger.crawlerTypes[crawler.ClassId]
63+
info.Browser.typ = CRAWLER_CLASS_ID
64+
info.Browser.Type = udger.browserTypes[CRAWLER_CLASS_ID]
65+
}
66+
5667
if val, ok := udger.browserOS[browserID]; ok {
5768
info.OS = udger.OS[val]
5869
} else {
@@ -71,7 +82,7 @@ func (udger *Udger) Lookup(ua string) (*Info, error) {
7182
Name: "Smartphone",
7283
Icon: "phone.png",
7384
}
74-
} else if info.Browser.typ == 5 || info.Browser.typ == 10 || info.Browser.typ == 20 || info.Browser.typ == 50 {
85+
} else if info.Browser.typ == 5 || info.Browser.typ == 10 || info.Browser.typ == 20 || info.Browser.typ == 50 || info.Browser.typ == CRAWLER_CLASS_ID {
7586
info.Device = Device{
7687
Name: "Other",
7788
Icon: "other.png",
@@ -88,13 +99,8 @@ func (udger *Udger) Lookup(ua string) (*Info, error) {
8899

89100
func (udger *Udger) cleanRegex(r string) string {
90101
// removes single-line and case-insensitive modifiers
91-
if strings.HasSuffix(r, "/si") {
92-
r = r[:len(r)-3]
93-
}
94-
if strings.HasPrefix(r, "/") {
95-
r = r[1:]
96-
}
97-
102+
r = strings.TrimSuffix(r, "/si")
103+
r = strings.TrimPrefix(r, "/")
98104
return r
99105
}
100106

@@ -128,6 +134,9 @@ func (udger *Udger) init() error {
128134
if err := udger.initOS(); err != nil {
129135
return err
130136
}
137+
if err := udger.initCrawlers(); err != nil {
138+
return err
139+
}
131140
return nil
132141
}
133142

@@ -253,3 +262,31 @@ func (udger *Udger) initOS() error {
253262
rows.Close()
254263
return nil
255264
}
265+
266+
func (udger *Udger) initCrawlers() error {
267+
// Uncategorised, Search engine bot, Site monitor, etc.
268+
rows, err := udger.db.Query("SELECT id, crawler_classification FROM udger_crawler_class")
269+
if err != nil {
270+
return err
271+
}
272+
for rows.Next() {
273+
var crawlerClass string
274+
var id int
275+
rows.Scan(&id, &crawlerClass)
276+
udger.crawlerTypes[id] = crawlerClass
277+
}
278+
rows.Close()
279+
280+
rows, err = udger.db.Query("SELECT ua_string, name, family, vendor, class_id FROM udger_crawler_list")
281+
if err != nil {
282+
return err
283+
}
284+
for rows.Next() {
285+
var crawler Crawler
286+
var uaString string
287+
rows.Scan(&uaString, &crawler.Name, &crawler.Family, &crawler.Vendor, &crawler.ClassId)
288+
udger.Crawlers[uaString] = crawler
289+
}
290+
rows.Close()
291+
return nil
292+
}

udger_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,35 @@ func TestValidDbName(t *testing.T) {
159159
So(info.Browser.Version, ShouldResemble, "")
160160
})
161161
})
162+
163+
Convey("test Crawler", func() {
164+
info, err := udger.Lookup("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)")
165+
So(err, ShouldBeNil)
166+
So(info, ShouldNotBeNil)
167+
168+
Convey("test lookup info", func() {
169+
So(info.OS.Company, ShouldResemble, "")
170+
So(info.OS.Family, ShouldResemble, "")
171+
So(info.OS.Icon, ShouldResemble, "")
172+
So(info.OS.Name, ShouldResemble, "")
173+
174+
So(info.Device.Name, ShouldResemble, "Other")
175+
So(info.Device.Icon, ShouldResemble, "other.png")
176+
177+
So(info.Browser.Company, ShouldResemble, "")
178+
So(info.Browser.Engine, ShouldResemble, "")
179+
So(info.Browser.Family, ShouldResemble, "")
180+
So(info.Browser.Icon, ShouldResemble, "")
181+
So(info.Browser.Name, ShouldResemble, "")
182+
So(info.Browser.Type, ShouldResemble, "Crawler")
183+
So(info.Browser.Version, ShouldResemble, "")
184+
185+
So(info.Crawler.Name, ShouldResemble, "Googlebot Desktop")
186+
So(info.Crawler.Family, ShouldResemble, "Googlebot")
187+
So(info.Crawler.Vendor, ShouldResemble, "Google Inc.")
188+
So(info.Crawler.Class, ShouldResemble, "Search engine bot")
189+
})
190+
})
162191
})
163192
})
164193
}

0 commit comments

Comments
 (0)