fix: update hltb plugin to work with new website structure
Some checks failed
ci/woodpecker/push/ci Pipeline failed
ci/woodpecker/tag/release Pipeline was successful

The HowLongToBeat website has changed from API-based search to server-side
rendering with Next.js. This update fixes the plugin to work with the new format:

- Switch from POST API requests to GET requests with query parameters
- Replace JSON API parsing with HTML content parsing
- Add support for parsing Next.js __NEXT_DATA__ embedded JSON
- Add fallback regex-based HTML parsing for game cards
- Extract completion times, game names, IDs, and cover images from HTML
- Support half-hour notation (e.g., "31½ Hours") in time parsing
- Remove obsolete seek token functionality
- Simplify data structures to match new response format

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Felipe M 2025-07-15 18:55:18 +02:00
parent 377b1723c3
commit 5bec3b6a7c
Signed by: fmartingr
GPG key ID: CCFBC5637D4000A8

View file

@ -1,12 +1,13 @@
package fun package fun
import ( import (
"bytes"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"net/url"
"regexp" "regexp"
"strconv"
"strings" "strings"
"time" "time"
@ -20,48 +21,26 @@ type HLTBPlugin struct {
httpClient *http.Client httpClient *http.Client
} }
// HLTBSearchRequest represents the search request payload // HLTBNextData represents the Next.js data structure from the page
type HLTBSearchRequest struct { type HLTBNextData struct {
SearchType string `json:"searchType"` Props struct {
SearchTerms []string `json:"searchTerms"` PageProps struct {
SearchPage int `json:"searchPage"` SearchResults []HLTBGame `json:"searchResults"`
Size int `json:"size"` } `json:"pageProps"`
SearchOptions map[string]interface{} `json:"searchOptions"` } `json:"props"`
UseCache bool `json:"useCache"`
} }
// HLTBGame represents a game from HowLongToBeat // HLTBGame represents a game from HowLongToBeat
type HLTBGame struct { type HLTBGame struct {
ID int `json:"game_id"` ID int `json:"game_id"`
Name string `json:"game_name"` Name string `json:"game_name"`
GameAlias string `json:"game_alias"` GameAlias string `json:"game_alias"`
GameImage string `json:"game_image"` GameImage string `json:"game_image"`
CompMain int `json:"comp_main"` CompMain int `json:"comp_main"`
CompPlus int `json:"comp_plus"` CompPlus int `json:"comp_plus"`
CompComplete int `json:"comp_complete"` CompComplete int `json:"comp_100"`
CompAll int `json:"comp_all"` CompAll int `json:"comp_all"`
InvestedCo int `json:"invested_co"` ReviewScore int `json:"review_score"`
InvestedMp int `json:"invested_mp"`
CountComp int `json:"count_comp"`
CountSpeedruns int `json:"count_speedruns"`
CountBacklog int `json:"count_backlog"`
CountReview int `json:"count_review"`
ReviewScore int `json:"review_score"`
CountPlaying int `json:"count_playing"`
CountRetired int `json:"count_retired"`
}
// HLTBSearchResponse represents the search response
type HLTBSearchResponse struct {
Color string `json:"color"`
Title string `json:"title"`
Category string `json:"category"`
Count int `json:"count"`
Pagecurrent int `json:"pagecurrent"`
Pagesize int `json:"pagesize"`
Pagetotal int `json:"pagetotal"`
SearchTerm string `json:"searchTerm"`
SearchResults []HLTBGame `json:"data"`
} }
// NewHLTB creates a new HLTBPlugin instance // NewHLTB creates a new HLTBPlugin instance
@ -153,90 +132,27 @@ func (p *HLTBPlugin) OnMessage(msg *model.Message, config map[string]interface{}
return []*model.MessageAction{action} return []*model.MessageAction{action}
} }
// searchGame searches for a game on HowLongToBeat // searchGame searches for a game on HowLongToBeat using the new web scraping approach
func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) { func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) {
// Split search terms by words // Create search URL with query parameter
searchTerms := strings.Fields(gameName) searchURL := fmt.Sprintf("https://howlongtobeat.com/?q=%s", url.QueryEscape(gameName))
// Prepare search request
searchRequest := HLTBSearchRequest{
SearchType: "games",
SearchTerms: searchTerms,
SearchPage: 1,
Size: 20,
SearchOptions: map[string]interface{}{
"games": map[string]interface{}{
"userId": 0,
"platform": "",
"sortCategory": "popular",
"rangeCategory": "main",
"rangeTime": map[string]interface{}{
"min": nil,
"max": nil,
},
"gameplay": map[string]interface{}{
"perspective": "",
"flow": "",
"genre": "",
"difficulty": "",
},
"rangeYear": map[string]interface{}{
"min": "",
"max": "",
},
"modifier": "",
},
"users": map[string]interface{}{
"sortCategory": "postcount",
},
"lists": map[string]interface{}{
"sortCategory": "follows",
},
"filter": "",
"sort": 0,
"randomizer": 0,
},
UseCache: true,
}
// Convert to JSON
jsonData, err := json.Marshal(searchRequest)
if err != nil {
return nil, fmt.Errorf("failed to marshal search request: %w", err)
}
// The API endpoint appears to have changed to use dynamic tokens
// Try to get the seek token first, fallback to basic search
seekToken, err := p.getSeekToken()
if err != nil {
// Fallback to old endpoint
seekToken = ""
}
var apiURL string
if seekToken != "" {
apiURL = fmt.Sprintf("https://howlongtobeat.com/api/seek/%s", seekToken)
} else {
apiURL = "https://howlongtobeat.com/api/search"
}
// Create HTTP request // Create HTTP request
req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData)) req, err := http.NewRequest("GET", searchURL, nil)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err) return nil, fmt.Errorf("failed to create request: %w", err)
} }
// Set headers to match the working curl request // Set headers to match browser request
req.Header.Set("Accept", "*/*") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8")
req.Header.Set("Accept-Language", "en-US,en;q=0.9") req.Header.Set("Accept-Language", "en-US,en;q=0.9")
req.Header.Set("Cache-Control", "no-cache") req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Origin", "https://howlongtobeat.com")
req.Header.Set("Pragma", "no-cache") req.Header.Set("Pragma", "no-cache")
req.Header.Set("Referer", "https://howlongtobeat.com") req.Header.Set("Sec-Fetch-Dest", "document")
req.Header.Set("Sec-Fetch-Dest", "empty") req.Header.Set("Sec-Fetch-Mode", "navigate")
req.Header.Set("Sec-Fetch-Mode", "cors") req.Header.Set("Sec-Fetch-Site", "none")
req.Header.Set("Sec-Fetch-Site", "same-origin") req.Header.Set("Sec-Fetch-User", "?1")
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36") req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36")
// Send request // Send request
@ -249,7 +165,7 @@ func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) {
}() }()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("API returned status code: %d", resp.StatusCode) return nil, fmt.Errorf("HTTP request failed with status code: %d", resp.StatusCode)
} }
// Read response body // Read response body
@ -258,13 +174,105 @@ func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) {
return nil, fmt.Errorf("failed to read response: %w", err) return nil, fmt.Errorf("failed to read response: %w", err)
} }
// Parse response // Parse games from HTML
var searchResponse HLTBSearchResponse games, err := p.parseGamesFromHTML(string(body))
if err := json.Unmarshal(body, &searchResponse); err != nil { if err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err) return nil, fmt.Errorf("failed to parse games from HTML: %w", err)
} }
return searchResponse.SearchResults, nil return games, nil
}
// parseGamesFromHTML extracts game information from the HTML response
func (p *HLTBPlugin) parseGamesFromHTML(html string) ([]HLTBGame, error) {
var games []HLTBGame
// First, try to parse from the __NEXT_DATA__ JSON
jsonDataPattern := `<script id="__NEXT_DATA__" type="application/json">([^<]+)</script>`
jsonRegex := regexp.MustCompile(jsonDataPattern)
jsonMatches := jsonRegex.FindStringSubmatch(html)
if len(jsonMatches) > 1 {
var nextData HLTBNextData
if err := json.Unmarshal([]byte(jsonMatches[1]), &nextData); err == nil {
// Try to extract search results from the Next.js data
if nextData.Props.PageProps.SearchResults != nil {
return nextData.Props.PageProps.SearchResults, nil
}
}
}
// Fallback to HTML parsing for game cards
// Pattern to match game cards in the HTML
gameCardPattern := `<li class="back_darkish GameCard_search_list__[^"]*"[^>]*>.*?href="/game/(\d+)"[^>]*>([^<]+)</a>.*?</li>`
gameCardRegex := regexp.MustCompile(gameCardPattern)
gameCards := gameCardRegex.FindAllStringSubmatch(html, -1)
for _, match := range gameCards {
if len(match) >= 3 {
gameID, err := strconv.Atoi(match[1])
if err != nil {
continue
}
gameName := strings.TrimSpace(match[2])
if gameName == "" {
continue
}
// Extract completion times from the game card
gameCardHTML := match[0]
compMain := p.extractTimeFromHTML(gameCardHTML, "Main Story")
compPlus := p.extractTimeFromHTML(gameCardHTML, "Main + Extra")
compComplete := p.extractTimeFromHTML(gameCardHTML, "Completionist")
// Extract game image
gameImage := p.extractGameImage(gameCardHTML)
game := HLTBGame{
ID: gameID,
Name: gameName,
GameImage: gameImage,
CompMain: compMain,
CompPlus: compPlus,
CompComplete: compComplete,
}
games = append(games, game)
}
}
return games, nil
}
// extractTimeFromHTML extracts time values from HTML content
func (p *HLTBPlugin) extractTimeFromHTML(html, category string) int {
// Pattern to match time values after category labels
pattern := fmt.Sprintf(`%s.*?(\d+(?:½)?)\s*Hours?`, regexp.QuoteMeta(category))
timeRegex := regexp.MustCompile(pattern)
match := timeRegex.FindStringSubmatch(html)
if len(match) > 1 {
timeStr := strings.ReplaceAll(match[1], "½", ".5")
if timeFloat, err := strconv.ParseFloat(timeStr, 64); err == nil {
return int(timeFloat * 3600) // Convert to seconds
}
}
return 0
}
// extractGameImage extracts the game image from HTML content
func (p *HLTBPlugin) extractGameImage(html string) string {
imagePattern := `src="https://howlongtobeat\.com/games/([^"]+)"`
imageRegex := regexp.MustCompile(imagePattern)
match := imageRegex.FindStringSubmatch(html)
if len(match) > 1 {
return match[1]
}
return ""
} }
// formatGameInfo formats game information for display // formatGameInfo formats game information for display
@ -331,49 +339,6 @@ func (p *HLTBPlugin) getFullImageURL(imagePath string) string {
return fmt.Sprintf("https://howlongtobeat.com/games/%s", imagePath) return fmt.Sprintf("https://howlongtobeat.com/games/%s", imagePath)
} }
// getSeekToken attempts to retrieve the seek token from HowLongToBeat
func (p *HLTBPlugin) getSeekToken() (string, error) {
// Try to extract the seek token from the main page
req, err := http.NewRequest("GET", "https://howlongtobeat.com", nil)
if err != nil {
return "", fmt.Errorf("failed to create token request: %w", err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36")
resp, err := p.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("failed to fetch token: %w", err)
}
defer func() {
_ = resp.Body.Close()
}()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read token response: %w", err)
}
// Look for patterns that might contain the token
patterns := []string{
`/api/seek/([a-f0-9]+)`,
`"seek/([a-f0-9]+)"`,
`seek/([a-f0-9]{12,})`,
}
bodyStr := string(body)
for _, pattern := range patterns {
re := regexp.MustCompile(pattern)
matches := re.FindStringSubmatch(bodyStr)
if len(matches) > 1 {
return matches[1], nil
}
}
// If we can't extract a token, return the known working one as fallback
return "d4b2e330db04dbf3", nil
}
// createErrorResponse creates an error response message // createErrorResponse creates an error response message
func (p *HLTBPlugin) createErrorResponse(msg *model.Message, errorText string) []*model.MessageAction { func (p *HLTBPlugin) createErrorResponse(msg *model.Message, errorText string) []*model.MessageAction {
response := &model.Message{ response := &model.Message{