From 5bec3b6a7cfed0ea8a04dfb3d9ce620ca0da1825 Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Tue, 15 Jul 2025 18:55:18 +0200 Subject: [PATCH] fix: update hltb plugin to work with new website structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HowLongToBeat website has changed from API-based search to server-side rendering with Next.js. This update fixes the plugin to work with the new format: - Switch from POST API requests to GET requests with query parameters - Replace JSON API parsing with HTML content parsing - Add support for parsing Next.js __NEXT_DATA__ embedded JSON - Add fallback regex-based HTML parsing for game cards - Extract completion times, game names, IDs, and cover images from HTML - Support half-hour notation (e.g., "31½ Hours") in time parsing - Remove obsolete seek token functionality - Simplify data structures to match new response format 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- internal/plugin/fun/hltb.go | 289 ++++++++++++++++-------------------- 1 file changed, 127 insertions(+), 162 deletions(-) diff --git a/internal/plugin/fun/hltb.go b/internal/plugin/fun/hltb.go index f94f2ba..7da29e6 100644 --- a/internal/plugin/fun/hltb.go +++ b/internal/plugin/fun/hltb.go @@ -1,12 +1,13 @@ package fun import ( - "bytes" "encoding/json" "fmt" "io" "net/http" + "net/url" "regexp" + "strconv" "strings" "time" @@ -20,48 +21,26 @@ type HLTBPlugin struct { httpClient *http.Client } -// HLTBSearchRequest represents the search request payload -type HLTBSearchRequest struct { - SearchType string `json:"searchType"` - SearchTerms []string `json:"searchTerms"` - SearchPage int `json:"searchPage"` - Size int `json:"size"` - SearchOptions map[string]interface{} `json:"searchOptions"` - UseCache bool `json:"useCache"` +// HLTBNextData represents the Next.js data structure from the page +type HLTBNextData struct { + Props struct { + PageProps struct { + SearchResults []HLTBGame `json:"searchResults"` + } `json:"pageProps"` + } `json:"props"` } // HLTBGame represents a game from HowLongToBeat type HLTBGame struct { - ID int `json:"game_id"` - Name string `json:"game_name"` - GameAlias string `json:"game_alias"` - GameImage string `json:"game_image"` - CompMain int `json:"comp_main"` - CompPlus int `json:"comp_plus"` - CompComplete int `json:"comp_complete"` - CompAll int `json:"comp_all"` - InvestedCo int `json:"invested_co"` - InvestedMp int `json:"invested_mp"` - CountComp int `json:"count_comp"` - CountSpeedruns int `json:"count_speedruns"` - CountBacklog int `json:"count_backlog"` - CountReview int `json:"count_review"` - ReviewScore int `json:"review_score"` - CountPlaying int `json:"count_playing"` - CountRetired int `json:"count_retired"` -} - -// HLTBSearchResponse represents the search response -type HLTBSearchResponse struct { - Color string `json:"color"` - Title string `json:"title"` - Category string `json:"category"` - Count int `json:"count"` - Pagecurrent int `json:"pagecurrent"` - Pagesize int `json:"pagesize"` - Pagetotal int `json:"pagetotal"` - SearchTerm string `json:"searchTerm"` - SearchResults []HLTBGame `json:"data"` + ID int `json:"game_id"` + Name string `json:"game_name"` + GameAlias string `json:"game_alias"` + GameImage string `json:"game_image"` + CompMain int `json:"comp_main"` + CompPlus int `json:"comp_plus"` + CompComplete int `json:"comp_100"` + CompAll int `json:"comp_all"` + ReviewScore int `json:"review_score"` } // NewHLTB creates a new HLTBPlugin instance @@ -153,90 +132,27 @@ func (p *HLTBPlugin) OnMessage(msg *model.Message, config map[string]interface{} return []*model.MessageAction{action} } -// searchGame searches for a game on HowLongToBeat +// searchGame searches for a game on HowLongToBeat using the new web scraping approach func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) { - // Split search terms by words - searchTerms := strings.Fields(gameName) - - // Prepare search request - searchRequest := HLTBSearchRequest{ - SearchType: "games", - SearchTerms: searchTerms, - SearchPage: 1, - Size: 20, - SearchOptions: map[string]interface{}{ - "games": map[string]interface{}{ - "userId": 0, - "platform": "", - "sortCategory": "popular", - "rangeCategory": "main", - "rangeTime": map[string]interface{}{ - "min": nil, - "max": nil, - }, - "gameplay": map[string]interface{}{ - "perspective": "", - "flow": "", - "genre": "", - "difficulty": "", - }, - "rangeYear": map[string]interface{}{ - "min": "", - "max": "", - }, - "modifier": "", - }, - "users": map[string]interface{}{ - "sortCategory": "postcount", - }, - "lists": map[string]interface{}{ - "sortCategory": "follows", - }, - "filter": "", - "sort": 0, - "randomizer": 0, - }, - UseCache: true, - } - - // Convert to JSON - jsonData, err := json.Marshal(searchRequest) - if err != nil { - return nil, fmt.Errorf("failed to marshal search request: %w", err) - } - - // The API endpoint appears to have changed to use dynamic tokens - // Try to get the seek token first, fallback to basic search - seekToken, err := p.getSeekToken() - if err != nil { - // Fallback to old endpoint - seekToken = "" - } - - var apiURL string - if seekToken != "" { - apiURL = fmt.Sprintf("https://howlongtobeat.com/api/seek/%s", seekToken) - } else { - apiURL = "https://howlongtobeat.com/api/search" - } + // Create search URL with query parameter + searchURL := fmt.Sprintf("https://howlongtobeat.com/?q=%s", url.QueryEscape(gameName)) // Create HTTP request - req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData)) + req, err := http.NewRequest("GET", searchURL, nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } - // Set headers to match the working curl request - req.Header.Set("Accept", "*/*") + // Set headers to match browser request + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") req.Header.Set("Accept-Language", "en-US,en;q=0.9") req.Header.Set("Cache-Control", "no-cache") - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Origin", "https://howlongtobeat.com") req.Header.Set("Pragma", "no-cache") - req.Header.Set("Referer", "https://howlongtobeat.com") - req.Header.Set("Sec-Fetch-Dest", "empty") - req.Header.Set("Sec-Fetch-Mode", "cors") - req.Header.Set("Sec-Fetch-Site", "same-origin") + req.Header.Set("Sec-Fetch-Dest", "document") + req.Header.Set("Sec-Fetch-Mode", "navigate") + req.Header.Set("Sec-Fetch-Site", "none") + req.Header.Set("Sec-Fetch-User", "?1") + req.Header.Set("Upgrade-Insecure-Requests", "1") req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36") // Send request @@ -249,7 +165,7 @@ func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) { }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("API returned status code: %d", resp.StatusCode) + return nil, fmt.Errorf("HTTP request failed with status code: %d", resp.StatusCode) } // Read response body @@ -258,13 +174,105 @@ func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) { return nil, fmt.Errorf("failed to read response: %w", err) } - // Parse response - var searchResponse HLTBSearchResponse - if err := json.Unmarshal(body, &searchResponse); err != nil { - return nil, fmt.Errorf("failed to parse response: %w", err) + // Parse games from HTML + games, err := p.parseGamesFromHTML(string(body)) + if err != nil { + return nil, fmt.Errorf("failed to parse games from HTML: %w", err) } - return searchResponse.SearchResults, nil + return games, nil +} + +// parseGamesFromHTML extracts game information from the HTML response +func (p *HLTBPlugin) parseGamesFromHTML(html string) ([]HLTBGame, error) { + var games []HLTBGame + + // First, try to parse from the __NEXT_DATA__ JSON + jsonDataPattern := `` + jsonRegex := regexp.MustCompile(jsonDataPattern) + jsonMatches := jsonRegex.FindStringSubmatch(html) + + if len(jsonMatches) > 1 { + var nextData HLTBNextData + if err := json.Unmarshal([]byte(jsonMatches[1]), &nextData); err == nil { + // Try to extract search results from the Next.js data + if nextData.Props.PageProps.SearchResults != nil { + return nextData.Props.PageProps.SearchResults, nil + } + } + } + + // Fallback to HTML parsing for game cards + // Pattern to match game cards in the HTML + gameCardPattern := `
  • ]*>.*?href="/game/(\d+)"[^>]*>([^<]+).*?
  • ` + gameCardRegex := regexp.MustCompile(gameCardPattern) + gameCards := gameCardRegex.FindAllStringSubmatch(html, -1) + + for _, match := range gameCards { + if len(match) >= 3 { + gameID, err := strconv.Atoi(match[1]) + if err != nil { + continue + } + + gameName := strings.TrimSpace(match[2]) + if gameName == "" { + continue + } + + // Extract completion times from the game card + gameCardHTML := match[0] + compMain := p.extractTimeFromHTML(gameCardHTML, "Main Story") + compPlus := p.extractTimeFromHTML(gameCardHTML, "Main + Extra") + compComplete := p.extractTimeFromHTML(gameCardHTML, "Completionist") + + // Extract game image + gameImage := p.extractGameImage(gameCardHTML) + + game := HLTBGame{ + ID: gameID, + Name: gameName, + GameImage: gameImage, + CompMain: compMain, + CompPlus: compPlus, + CompComplete: compComplete, + } + + games = append(games, game) + } + } + + return games, nil +} + +// extractTimeFromHTML extracts time values from HTML content +func (p *HLTBPlugin) extractTimeFromHTML(html, category string) int { + // Pattern to match time values after category labels + pattern := fmt.Sprintf(`%s.*?(\d+(?:½)?)\s*Hours?`, regexp.QuoteMeta(category)) + timeRegex := regexp.MustCompile(pattern) + match := timeRegex.FindStringSubmatch(html) + + if len(match) > 1 { + timeStr := strings.ReplaceAll(match[1], "½", ".5") + if timeFloat, err := strconv.ParseFloat(timeStr, 64); err == nil { + return int(timeFloat * 3600) // Convert to seconds + } + } + + return 0 +} + +// extractGameImage extracts the game image from HTML content +func (p *HLTBPlugin) extractGameImage(html string) string { + imagePattern := `src="https://howlongtobeat\.com/games/([^"]+)"` + imageRegex := regexp.MustCompile(imagePattern) + match := imageRegex.FindStringSubmatch(html) + + if len(match) > 1 { + return match[1] + } + + return "" } // formatGameInfo formats game information for display @@ -331,49 +339,6 @@ func (p *HLTBPlugin) getFullImageURL(imagePath string) string { return fmt.Sprintf("https://howlongtobeat.com/games/%s", imagePath) } -// getSeekToken attempts to retrieve the seek token from HowLongToBeat -func (p *HLTBPlugin) getSeekToken() (string, error) { - // Try to extract the seek token from the main page - req, err := http.NewRequest("GET", "https://howlongtobeat.com", nil) - if err != nil { - return "", fmt.Errorf("failed to create token request: %w", err) - } - - req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36") - - resp, err := p.httpClient.Do(req) - if err != nil { - return "", fmt.Errorf("failed to fetch token: %w", err) - } - defer func() { - _ = resp.Body.Close() - }() - - body, err := io.ReadAll(resp.Body) - if err != nil { - return "", fmt.Errorf("failed to read token response: %w", err) - } - - // Look for patterns that might contain the token - patterns := []string{ - `/api/seek/([a-f0-9]+)`, - `"seek/([a-f0-9]+)"`, - `seek/([a-f0-9]{12,})`, - } - - bodyStr := string(body) - for _, pattern := range patterns { - re := regexp.MustCompile(pattern) - matches := re.FindStringSubmatch(bodyStr) - if len(matches) > 1 { - return matches[1], nil - } - } - - // If we can't extract a token, return the known working one as fallback - return "d4b2e330db04dbf3", nil -} - // createErrorResponse creates an error response message func (p *HLTBPlugin) createErrorResponse(msg *model.Message, errorText string) []*model.MessageAction { response := &model.Message{