diff --git a/internal/plugin/fun/hltb.go b/internal/plugin/fun/hltb.go
index 4c92052..7da29e6 100644
--- a/internal/plugin/fun/hltb.go
+++ b/internal/plugin/fun/hltb.go
@@ -1,12 +1,13 @@
package fun
import (
- "bytes"
"encoding/json"
"fmt"
"io"
"net/http"
+ "net/url"
"regexp"
+ "strconv"
"strings"
"time"
@@ -20,25 +21,26 @@ type HLTBPlugin struct {
httpClient *http.Client
}
+// HLTBNextData represents the Next.js data structure from the page
+type HLTBNextData struct {
+ Props struct {
+ PageProps struct {
+ SearchResults []HLTBGame `json:"searchResults"`
+ } `json:"pageProps"`
+ } `json:"props"`
+}
+
// HLTBGame represents a game from HowLongToBeat
type HLTBGame struct {
- ID int `json:"game_id"`
- Name string `json:"game_name"`
- GameAlias string `json:"game_alias"`
- GameImage string `json:"game_image"`
- CompMain int `json:"comp_main"`
- CompPlus int `json:"comp_plus"`
- CompComplete int `json:"comp_complete"`
- CompAll int `json:"comp_all"`
- InvestedCo int `json:"invested_co"`
- InvestedMp int `json:"invested_mp"`
- CountComp int `json:"count_comp"`
- CountSpeedruns int `json:"count_speedruns"`
- CountBacklog int `json:"count_backlog"`
- CountReview int `json:"count_review"`
- ReviewScore int `json:"review_score"`
- CountPlaying int `json:"count_playing"`
- CountRetired int `json:"count_retired"`
+ ID int `json:"game_id"`
+ Name string `json:"game_name"`
+ GameAlias string `json:"game_alias"`
+ GameImage string `json:"game_image"`
+ CompMain int `json:"comp_main"`
+ CompPlus int `json:"comp_plus"`
+ CompComplete int `json:"comp_100"`
+ CompAll int `json:"comp_all"`
+ ReviewScore int `json:"review_score"`
}
// NewHLTB creates a new HLTBPlugin instance
@@ -130,95 +132,27 @@ func (p *HLTBPlugin) OnMessage(msg *model.Message, config map[string]interface{}
return []*model.MessageAction{action}
}
-// searchGame searches for a game on HowLongToBeat using the API
+// searchGame searches for a game on HowLongToBeat using the new web scraping approach
func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) {
- // Only the seek token endpoint works now
- return p.searchWithSeekToken(gameName)
-}
-
-// searchWithSeekToken attempts to search using the seek token approach
-func (p *HLTBPlugin) searchWithSeekToken(gameName string) ([]HLTBGame, error) {
- // Get the seek token from the main page
- seekToken, err := p.getSeekToken()
- if err != nil {
- return nil, fmt.Errorf("failed to get seek token: %w", err)
- }
-
- // Split search terms by words
- searchTerms := strings.Fields(gameName)
-
- // Create search URL with seek token
- searchURL := fmt.Sprintf("https://howlongtobeat.com/api/seek/%s", seekToken)
-
- // Prepare search request
- searchRequest := map[string]interface{}{
- "searchType": "games",
- "searchTerms": searchTerms,
- "searchPage": 1,
- "size": 20,
- "searchOptions": map[string]interface{}{
- "games": map[string]interface{}{
- "userId": 0,
- "platform": "",
- "sortCategory": "popular",
- "rangeCategory": "main",
- "rangeTime": map[string]interface{}{
- "min": nil,
- "max": nil,
- },
- "gameplay": map[string]interface{}{
- "perspective": "",
- "flow": "",
- "genre": "",
- "difficulty": "",
- },
- "rangeYear": map[string]interface{}{
- "min": "",
- "max": "",
- },
- "modifier": "",
- },
- "users": map[string]interface{}{
- "sortCategory": "postcount",
- },
- "lists": map[string]interface{}{
- "sortCategory": "follows",
- },
- "filter": "",
- "sort": 0,
- "randomizer": 0,
- },
- "useCache": true,
- }
-
- return p.performAPISearch(searchURL, searchRequest)
-}
-
-// performAPISearch performs the actual API search request
-func (p *HLTBPlugin) performAPISearch(searchURL string, searchRequest map[string]interface{}) ([]HLTBGame, error) {
- // Convert to JSON
- jsonData, err := json.Marshal(searchRequest)
- if err != nil {
- return nil, fmt.Errorf("failed to marshal search request: %w", err)
- }
+ // Create search URL with query parameter
+ searchURL := fmt.Sprintf("https://howlongtobeat.com/?q=%s", url.QueryEscape(gameName))
// Create HTTP request
- req, err := http.NewRequest("POST", searchURL, bytes.NewBuffer(jsonData))
+ req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
- // Set headers to match the working curl request
- req.Header.Set("Accept", "*/*")
+ // Set headers to match browser request
+ req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
req.Header.Set("Cache-Control", "no-cache")
- req.Header.Set("Content-Type", "application/json")
- req.Header.Set("Origin", "https://howlongtobeat.com")
req.Header.Set("Pragma", "no-cache")
- req.Header.Set("Referer", "https://howlongtobeat.com/")
- req.Header.Set("Sec-Fetch-Dest", "empty")
- req.Header.Set("Sec-Fetch-Mode", "cors")
- req.Header.Set("Sec-Fetch-Site", "same-origin")
+ req.Header.Set("Sec-Fetch-Dest", "document")
+ req.Header.Set("Sec-Fetch-Mode", "navigate")
+ req.Header.Set("Sec-Fetch-Site", "none")
+ req.Header.Set("Sec-Fetch-User", "?1")
+ req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36")
// Send request
@@ -231,7 +165,7 @@ func (p *HLTBPlugin) performAPISearch(searchURL string, searchRequest map[string
}()
if resp.StatusCode != http.StatusOK {
- return nil, fmt.Errorf("API returned status code: %d", resp.StatusCode)
+ return nil, fmt.Errorf("HTTP request failed with status code: %d", resp.StatusCode)
}
// Read response body
@@ -240,24 +174,105 @@ func (p *HLTBPlugin) performAPISearch(searchURL string, searchRequest map[string
return nil, fmt.Errorf("failed to read response: %w", err)
}
- // Parse response
- var searchResponse struct {
- Color string `json:"color"`
- Title string `json:"title"`
- Category string `json:"category"`
- Count int `json:"count"`
- Pagecurrent int `json:"pagecurrent"`
- Pagesize int `json:"pagesize"`
- Pagetotal int `json:"pagetotal"`
- SearchTerm string `json:"searchTerm"`
- SearchResults []HLTBGame `json:"data"`
+ // Parse games from HTML
+ games, err := p.parseGamesFromHTML(string(body))
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse games from HTML: %w", err)
}
- if err := json.Unmarshal(body, &searchResponse); err != nil {
- return nil, fmt.Errorf("failed to parse response: %w", err)
+ return games, nil
+}
+
+// parseGamesFromHTML extracts game information from the HTML response
+func (p *HLTBPlugin) parseGamesFromHTML(html string) ([]HLTBGame, error) {
+ var games []HLTBGame
+
+ // First, try to parse from the __NEXT_DATA__ JSON
+ jsonDataPattern := ``
+ jsonRegex := regexp.MustCompile(jsonDataPattern)
+ jsonMatches := jsonRegex.FindStringSubmatch(html)
+
+ if len(jsonMatches) > 1 {
+ var nextData HLTBNextData
+ if err := json.Unmarshal([]byte(jsonMatches[1]), &nextData); err == nil {
+ // Try to extract search results from the Next.js data
+ if nextData.Props.PageProps.SearchResults != nil {
+ return nextData.Props.PageProps.SearchResults, nil
+ }
+ }
}
- return searchResponse.SearchResults, nil
+ // Fallback to HTML parsing for game cards
+ // Pattern to match game cards in the HTML
+ gameCardPattern := `
]*>.*?href="/game/(\d+)"[^>]*>([^<]+).*?`
+ gameCardRegex := regexp.MustCompile(gameCardPattern)
+ gameCards := gameCardRegex.FindAllStringSubmatch(html, -1)
+
+ for _, match := range gameCards {
+ if len(match) >= 3 {
+ gameID, err := strconv.Atoi(match[1])
+ if err != nil {
+ continue
+ }
+
+ gameName := strings.TrimSpace(match[2])
+ if gameName == "" {
+ continue
+ }
+
+ // Extract completion times from the game card
+ gameCardHTML := match[0]
+ compMain := p.extractTimeFromHTML(gameCardHTML, "Main Story")
+ compPlus := p.extractTimeFromHTML(gameCardHTML, "Main + Extra")
+ compComplete := p.extractTimeFromHTML(gameCardHTML, "Completionist")
+
+ // Extract game image
+ gameImage := p.extractGameImage(gameCardHTML)
+
+ game := HLTBGame{
+ ID: gameID,
+ Name: gameName,
+ GameImage: gameImage,
+ CompMain: compMain,
+ CompPlus: compPlus,
+ CompComplete: compComplete,
+ }
+
+ games = append(games, game)
+ }
+ }
+
+ return games, nil
+}
+
+// extractTimeFromHTML extracts time values from HTML content
+func (p *HLTBPlugin) extractTimeFromHTML(html, category string) int {
+ // Pattern to match time values after category labels
+ pattern := fmt.Sprintf(`%s.*?(\d+(?:½)?)\s*Hours?`, regexp.QuoteMeta(category))
+ timeRegex := regexp.MustCompile(pattern)
+ match := timeRegex.FindStringSubmatch(html)
+
+ if len(match) > 1 {
+ timeStr := strings.ReplaceAll(match[1], "½", ".5")
+ if timeFloat, err := strconv.ParseFloat(timeStr, 64); err == nil {
+ return int(timeFloat * 3600) // Convert to seconds
+ }
+ }
+
+ return 0
+}
+
+// extractGameImage extracts the game image from HTML content
+func (p *HLTBPlugin) extractGameImage(html string) string {
+ imagePattern := `src="https://howlongtobeat\.com/games/([^"]+)"`
+ imageRegex := regexp.MustCompile(imagePattern)
+ match := imageRegex.FindStringSubmatch(html)
+
+ if len(match) > 1 {
+ return match[1]
+ }
+
+ return ""
}
// formatGameInfo formats game information for display
@@ -324,202 +339,6 @@ func (p *HLTBPlugin) getFullImageURL(imagePath string) string {
return fmt.Sprintf("https://howlongtobeat.com/games/%s", imagePath)
}
-// getSeekToken retrieves the seek token from HowLongToBeat
-func (p *HLTBPlugin) getSeekToken() (string, error) {
- // Get the main page to extract buildId
- req, err := http.NewRequest("GET", "https://howlongtobeat.com", nil)
- if err != nil {
- return "", fmt.Errorf("failed to create token request: %w", err)
- }
-
- req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36")
-
- resp, err := p.httpClient.Do(req)
- if err != nil {
- return "", fmt.Errorf("failed to fetch token: %w", err)
- }
- defer func() {
- _ = resp.Body.Close()
- }()
-
- body, err := io.ReadAll(resp.Body)
- if err != nil {
- return "", fmt.Errorf("failed to read token response: %w", err)
- }
-
- bodyStr := string(body)
-
- // First, try to find buildId in the __NEXT_DATA__ or page source
- buildIdPatterns := []string{
- `"buildId":"([a-zA-Z0-9_-]+)"`,
- `buildId":"([a-zA-Z0-9_-]+)"`,
- `/_next/static/([a-zA-Z0-9_-]+)/_buildManifest`,
- }
-
- for _, pattern := range buildIdPatterns {
- re := regexp.MustCompile(pattern)
- matches := re.FindStringSubmatch(bodyStr)
- if len(matches) > 1 {
- buildId := matches[1]
- // Now try to get the seek token from the JavaScript files using buildId
- if token, err := p.getSeekTokenFromBuildId(buildId); err == nil {
- return token, nil
- }
- }
- }
-
- // If we can't find buildId, look for direct seek token patterns
- seekPatterns := []string{
- `/api/seek/([a-f0-9]{16})`,
- `"seek/([a-f0-9]{16})"`,
- `api/seek/([a-f0-9]{16})`,
- `seek/([a-f0-9]{12,})`,
- }
-
- for _, pattern := range seekPatterns {
- re := regexp.MustCompile(pattern)
- matches := re.FindStringSubmatch(bodyStr)
- if len(matches) > 1 {
- return matches[1], nil
- }
- }
-
- // Last resort: try multiple known working tokens
- knownTokens := []string{
- "6e17f7a193ef3188", // From your curl example
- "d4b2e330db04dbf3", // Common fallback
- }
-
- for _, token := range knownTokens {
- if p.testSeekToken(token) {
- return token, nil
- }
- }
-
- // Generate a token as last resort
- return p.generateSeekToken(), nil
-}
-
-// getSeekTokenFromBuildId attempts to extract seek token from build-specific files
-func (p *HLTBPlugin) getSeekTokenFromBuildId(buildId string) (string, error) {
- // Common build file patterns where seek tokens might be stored
- fileURLs := []string{
- fmt.Sprintf("https://howlongtobeat.com/_next/static/%s/_buildManifest.js", buildId),
- fmt.Sprintf("https://howlongtobeat.com/_next/static/%s/_ssgManifest.js", buildId),
- fmt.Sprintf("https://howlongtobeat.com/_next/static/chunks/pages/index-%s.js", buildId[:12]),
- }
-
- for _, fileURL := range fileURLs {
- if token, err := p.extractSeekTokenFromFile(fileURL); err == nil && token != "" {
- return token, nil
- }
- }
-
- return "", fmt.Errorf("no seek token found in build files")
-}
-
-// extractSeekTokenFromFile downloads and searches a file for seek token
-func (p *HLTBPlugin) extractSeekTokenFromFile(fileURL string) (string, error) {
- req, err := http.NewRequest("GET", fileURL, nil)
- if err != nil {
- return "", err
- }
-
- req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36")
-
- resp, err := p.httpClient.Do(req)
- if err != nil {
- return "", err
- }
- defer func() {
- _ = resp.Body.Close()
- }()
-
- if resp.StatusCode != http.StatusOK {
- return "", fmt.Errorf("failed to fetch file: %d", resp.StatusCode)
- }
-
- body, err := io.ReadAll(resp.Body)
- if err != nil {
- return "", err
- }
-
- bodyStr := string(body)
- patterns := []string{
- `seek/([a-f0-9]{16})`,
- `"([a-f0-9]{16})"`,
- `'([a-f0-9]{16})'`,
- }
-
- for _, pattern := range patterns {
- re := regexp.MustCompile(pattern)
- matches := re.FindStringSubmatch(bodyStr)
- if len(matches) > 1 {
- return matches[1], nil
- }
- }
-
- return "", fmt.Errorf("no seek token found in file")
-}
-
-// testSeekToken tests if a seek token works by making a simple API call
-func (p *HLTBPlugin) testSeekToken(token string) bool {
- searchURL := fmt.Sprintf("https://howlongtobeat.com/api/seek/%s", token)
- searchRequest := map[string]interface{}{
- "searchType": "games",
- "searchTerms": []string{"test"},
- "searchPage": 1,
- "size": 1,
- "searchOptions": map[string]interface{}{
- "games": map[string]interface{}{
- "userId": 0,
- "platform": "",
- "sortCategory": "popular",
- "rangeCategory": "main",
- "rangeTime": map[string]interface{}{
- "min": nil,
- "max": nil,
- },
- "gameplay": map[string]interface{}{
- "perspective": "",
- "flow": "",
- "genre": "",
- "difficulty": "",
- },
- "rangeYear": map[string]interface{}{
- "min": "",
- "max": "",
- },
- "modifier": "",
- },
- "users": map[string]interface{}{
- "sortCategory": "postcount",
- },
- "lists": map[string]interface{}{
- "sortCategory": "follows",
- },
- "filter": "",
- "sort": 0,
- "randomizer": 0,
- },
- "useCache": true,
- }
-
- // Test the token with a simple search
- if _, err := p.performAPISearch(searchURL, searchRequest); err == nil {
- return true
- }
- return false
-}
-
-// generateSeekToken generates a seek token based on current time
-func (p *HLTBPlugin) generateSeekToken() string {
- // Use a simple hash-like approach with current timestamp
- // This is a fallback approach since the real token generation is unknown
- now := time.Now().Unix()
- return fmt.Sprintf("%x", now%0xffffffff)[:16]
-}
-
// createErrorResponse creates an error response message
func (p *HLTBPlugin) createErrorResponse(msg *model.Message, errorText string) []*model.MessageAction {
response := &model.Message{