fix: update hltb plugin to work with new website structure
The HowLongToBeat website has changed from API-based search to server-side rendering with Next.js. This update fixes the plugin to work with the new format: - Switch from POST API requests to GET requests with query parameters - Replace JSON API parsing with HTML content parsing - Add support for parsing Next.js __NEXT_DATA__ embedded JSON - Add fallback regex-based HTML parsing for game cards - Extract completion times, game names, IDs, and cover images from HTML - Support half-hour notation (e.g., "31½ Hours") in time parsing - Remove obsolete seek token functionality - Simplify data structures to match new response format 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
377b1723c3
commit
5bec3b6a7c
1 changed files with 127 additions and 162 deletions
|
@ -1,12 +1,13 @@
|
|||
package fun
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
|
@ -20,48 +21,26 @@ type HLTBPlugin struct {
|
|||
httpClient *http.Client
|
||||
}
|
||||
|
||||
// HLTBSearchRequest represents the search request payload
|
||||
type HLTBSearchRequest struct {
|
||||
SearchType string `json:"searchType"`
|
||||
SearchTerms []string `json:"searchTerms"`
|
||||
SearchPage int `json:"searchPage"`
|
||||
Size int `json:"size"`
|
||||
SearchOptions map[string]interface{} `json:"searchOptions"`
|
||||
UseCache bool `json:"useCache"`
|
||||
// HLTBNextData represents the Next.js data structure from the page
|
||||
type HLTBNextData struct {
|
||||
Props struct {
|
||||
PageProps struct {
|
||||
SearchResults []HLTBGame `json:"searchResults"`
|
||||
} `json:"pageProps"`
|
||||
} `json:"props"`
|
||||
}
|
||||
|
||||
// HLTBGame represents a game from HowLongToBeat
|
||||
type HLTBGame struct {
|
||||
ID int `json:"game_id"`
|
||||
Name string `json:"game_name"`
|
||||
GameAlias string `json:"game_alias"`
|
||||
GameImage string `json:"game_image"`
|
||||
CompMain int `json:"comp_main"`
|
||||
CompPlus int `json:"comp_plus"`
|
||||
CompComplete int `json:"comp_complete"`
|
||||
CompAll int `json:"comp_all"`
|
||||
InvestedCo int `json:"invested_co"`
|
||||
InvestedMp int `json:"invested_mp"`
|
||||
CountComp int `json:"count_comp"`
|
||||
CountSpeedruns int `json:"count_speedruns"`
|
||||
CountBacklog int `json:"count_backlog"`
|
||||
CountReview int `json:"count_review"`
|
||||
ReviewScore int `json:"review_score"`
|
||||
CountPlaying int `json:"count_playing"`
|
||||
CountRetired int `json:"count_retired"`
|
||||
}
|
||||
|
||||
// HLTBSearchResponse represents the search response
|
||||
type HLTBSearchResponse struct {
|
||||
Color string `json:"color"`
|
||||
Title string `json:"title"`
|
||||
Category string `json:"category"`
|
||||
Count int `json:"count"`
|
||||
Pagecurrent int `json:"pagecurrent"`
|
||||
Pagesize int `json:"pagesize"`
|
||||
Pagetotal int `json:"pagetotal"`
|
||||
SearchTerm string `json:"searchTerm"`
|
||||
SearchResults []HLTBGame `json:"data"`
|
||||
ID int `json:"game_id"`
|
||||
Name string `json:"game_name"`
|
||||
GameAlias string `json:"game_alias"`
|
||||
GameImage string `json:"game_image"`
|
||||
CompMain int `json:"comp_main"`
|
||||
CompPlus int `json:"comp_plus"`
|
||||
CompComplete int `json:"comp_100"`
|
||||
CompAll int `json:"comp_all"`
|
||||
ReviewScore int `json:"review_score"`
|
||||
}
|
||||
|
||||
// NewHLTB creates a new HLTBPlugin instance
|
||||
|
@ -153,90 +132,27 @@ func (p *HLTBPlugin) OnMessage(msg *model.Message, config map[string]interface{}
|
|||
return []*model.MessageAction{action}
|
||||
}
|
||||
|
||||
// searchGame searches for a game on HowLongToBeat
|
||||
// searchGame searches for a game on HowLongToBeat using the new web scraping approach
|
||||
func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) {
|
||||
// Split search terms by words
|
||||
searchTerms := strings.Fields(gameName)
|
||||
|
||||
// Prepare search request
|
||||
searchRequest := HLTBSearchRequest{
|
||||
SearchType: "games",
|
||||
SearchTerms: searchTerms,
|
||||
SearchPage: 1,
|
||||
Size: 20,
|
||||
SearchOptions: map[string]interface{}{
|
||||
"games": map[string]interface{}{
|
||||
"userId": 0,
|
||||
"platform": "",
|
||||
"sortCategory": "popular",
|
||||
"rangeCategory": "main",
|
||||
"rangeTime": map[string]interface{}{
|
||||
"min": nil,
|
||||
"max": nil,
|
||||
},
|
||||
"gameplay": map[string]interface{}{
|
||||
"perspective": "",
|
||||
"flow": "",
|
||||
"genre": "",
|
||||
"difficulty": "",
|
||||
},
|
||||
"rangeYear": map[string]interface{}{
|
||||
"min": "",
|
||||
"max": "",
|
||||
},
|
||||
"modifier": "",
|
||||
},
|
||||
"users": map[string]interface{}{
|
||||
"sortCategory": "postcount",
|
||||
},
|
||||
"lists": map[string]interface{}{
|
||||
"sortCategory": "follows",
|
||||
},
|
||||
"filter": "",
|
||||
"sort": 0,
|
||||
"randomizer": 0,
|
||||
},
|
||||
UseCache: true,
|
||||
}
|
||||
|
||||
// Convert to JSON
|
||||
jsonData, err := json.Marshal(searchRequest)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal search request: %w", err)
|
||||
}
|
||||
|
||||
// The API endpoint appears to have changed to use dynamic tokens
|
||||
// Try to get the seek token first, fallback to basic search
|
||||
seekToken, err := p.getSeekToken()
|
||||
if err != nil {
|
||||
// Fallback to old endpoint
|
||||
seekToken = ""
|
||||
}
|
||||
|
||||
var apiURL string
|
||||
if seekToken != "" {
|
||||
apiURL = fmt.Sprintf("https://howlongtobeat.com/api/seek/%s", seekToken)
|
||||
} else {
|
||||
apiURL = "https://howlongtobeat.com/api/search"
|
||||
}
|
||||
// Create search URL with query parameter
|
||||
searchURL := fmt.Sprintf("https://howlongtobeat.com/?q=%s", url.QueryEscape(gameName))
|
||||
|
||||
// Create HTTP request
|
||||
req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData))
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
// Set headers to match the working curl request
|
||||
req.Header.Set("Accept", "*/*")
|
||||
// Set headers to match browser request
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||
req.Header.Set("Cache-Control", "no-cache")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Origin", "https://howlongtobeat.com")
|
||||
req.Header.Set("Pragma", "no-cache")
|
||||
req.Header.Set("Referer", "https://howlongtobeat.com")
|
||||
req.Header.Set("Sec-Fetch-Dest", "empty")
|
||||
req.Header.Set("Sec-Fetch-Mode", "cors")
|
||||
req.Header.Set("Sec-Fetch-Site", "same-origin")
|
||||
req.Header.Set("Sec-Fetch-Dest", "document")
|
||||
req.Header.Set("Sec-Fetch-Mode", "navigate")
|
||||
req.Header.Set("Sec-Fetch-Site", "none")
|
||||
req.Header.Set("Sec-Fetch-User", "?1")
|
||||
req.Header.Set("Upgrade-Insecure-Requests", "1")
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36")
|
||||
|
||||
// Send request
|
||||
|
@ -249,7 +165,7 @@ func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) {
|
|||
}()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("API returned status code: %d", resp.StatusCode)
|
||||
return nil, fmt.Errorf("HTTP request failed with status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
// Read response body
|
||||
|
@ -258,13 +174,105 @@ func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) {
|
|||
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
|
||||
// Parse response
|
||||
var searchResponse HLTBSearchResponse
|
||||
if err := json.Unmarshal(body, &searchResponse); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
// Parse games from HTML
|
||||
games, err := p.parseGamesFromHTML(string(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse games from HTML: %w", err)
|
||||
}
|
||||
|
||||
return searchResponse.SearchResults, nil
|
||||
return games, nil
|
||||
}
|
||||
|
||||
// parseGamesFromHTML extracts game information from the HTML response
|
||||
func (p *HLTBPlugin) parseGamesFromHTML(html string) ([]HLTBGame, error) {
|
||||
var games []HLTBGame
|
||||
|
||||
// First, try to parse from the __NEXT_DATA__ JSON
|
||||
jsonDataPattern := `<script id="__NEXT_DATA__" type="application/json">([^<]+)</script>`
|
||||
jsonRegex := regexp.MustCompile(jsonDataPattern)
|
||||
jsonMatches := jsonRegex.FindStringSubmatch(html)
|
||||
|
||||
if len(jsonMatches) > 1 {
|
||||
var nextData HLTBNextData
|
||||
if err := json.Unmarshal([]byte(jsonMatches[1]), &nextData); err == nil {
|
||||
// Try to extract search results from the Next.js data
|
||||
if nextData.Props.PageProps.SearchResults != nil {
|
||||
return nextData.Props.PageProps.SearchResults, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to HTML parsing for game cards
|
||||
// Pattern to match game cards in the HTML
|
||||
gameCardPattern := `<li class="back_darkish GameCard_search_list__[^"]*"[^>]*>.*?href="/game/(\d+)"[^>]*>([^<]+)</a>.*?</li>`
|
||||
gameCardRegex := regexp.MustCompile(gameCardPattern)
|
||||
gameCards := gameCardRegex.FindAllStringSubmatch(html, -1)
|
||||
|
||||
for _, match := range gameCards {
|
||||
if len(match) >= 3 {
|
||||
gameID, err := strconv.Atoi(match[1])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
gameName := strings.TrimSpace(match[2])
|
||||
if gameName == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract completion times from the game card
|
||||
gameCardHTML := match[0]
|
||||
compMain := p.extractTimeFromHTML(gameCardHTML, "Main Story")
|
||||
compPlus := p.extractTimeFromHTML(gameCardHTML, "Main + Extra")
|
||||
compComplete := p.extractTimeFromHTML(gameCardHTML, "Completionist")
|
||||
|
||||
// Extract game image
|
||||
gameImage := p.extractGameImage(gameCardHTML)
|
||||
|
||||
game := HLTBGame{
|
||||
ID: gameID,
|
||||
Name: gameName,
|
||||
GameImage: gameImage,
|
||||
CompMain: compMain,
|
||||
CompPlus: compPlus,
|
||||
CompComplete: compComplete,
|
||||
}
|
||||
|
||||
games = append(games, game)
|
||||
}
|
||||
}
|
||||
|
||||
return games, nil
|
||||
}
|
||||
|
||||
// extractTimeFromHTML extracts time values from HTML content
|
||||
func (p *HLTBPlugin) extractTimeFromHTML(html, category string) int {
|
||||
// Pattern to match time values after category labels
|
||||
pattern := fmt.Sprintf(`%s.*?(\d+(?:½)?)\s*Hours?`, regexp.QuoteMeta(category))
|
||||
timeRegex := regexp.MustCompile(pattern)
|
||||
match := timeRegex.FindStringSubmatch(html)
|
||||
|
||||
if len(match) > 1 {
|
||||
timeStr := strings.ReplaceAll(match[1], "½", ".5")
|
||||
if timeFloat, err := strconv.ParseFloat(timeStr, 64); err == nil {
|
||||
return int(timeFloat * 3600) // Convert to seconds
|
||||
}
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
// extractGameImage extracts the game image from HTML content
|
||||
func (p *HLTBPlugin) extractGameImage(html string) string {
|
||||
imagePattern := `src="https://howlongtobeat\.com/games/([^"]+)"`
|
||||
imageRegex := regexp.MustCompile(imagePattern)
|
||||
match := imageRegex.FindStringSubmatch(html)
|
||||
|
||||
if len(match) > 1 {
|
||||
return match[1]
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// formatGameInfo formats game information for display
|
||||
|
@ -331,49 +339,6 @@ func (p *HLTBPlugin) getFullImageURL(imagePath string) string {
|
|||
return fmt.Sprintf("https://howlongtobeat.com/games/%s", imagePath)
|
||||
}
|
||||
|
||||
// getSeekToken attempts to retrieve the seek token from HowLongToBeat
|
||||
func (p *HLTBPlugin) getSeekToken() (string, error) {
|
||||
// Try to extract the seek token from the main page
|
||||
req, err := http.NewRequest("GET", "https://howlongtobeat.com", nil)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create token request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36")
|
||||
|
||||
resp, err := p.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to fetch token: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read token response: %w", err)
|
||||
}
|
||||
|
||||
// Look for patterns that might contain the token
|
||||
patterns := []string{
|
||||
`/api/seek/([a-f0-9]+)`,
|
||||
`"seek/([a-f0-9]+)"`,
|
||||
`seek/([a-f0-9]{12,})`,
|
||||
}
|
||||
|
||||
bodyStr := string(body)
|
||||
for _, pattern := range patterns {
|
||||
re := regexp.MustCompile(pattern)
|
||||
matches := re.FindStringSubmatch(bodyStr)
|
||||
if len(matches) > 1 {
|
||||
return matches[1], nil
|
||||
}
|
||||
}
|
||||
|
||||
// If we can't extract a token, return the known working one as fallback
|
||||
return "d4b2e330db04dbf3", nil
|
||||
}
|
||||
|
||||
// createErrorResponse creates an error response message
|
||||
func (p *HLTBPlugin) createErrorResponse(msg *model.Message, errorText string) []*model.MessageAction {
|
||||
response := &model.Message{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue