fix: update hltb plugin to work with new website structure
The HowLongToBeat website has changed from API-based search to server-side rendering with Next.js. This update fixes the plugin to work with the new format: - Switch from POST API requests to GET requests with query parameters - Replace JSON API parsing with HTML content parsing - Add support for parsing Next.js __NEXT_DATA__ embedded JSON - Add fallback regex-based HTML parsing for game cards - Extract completion times, game names, IDs, and cover images from HTML - Support half-hour notation (e.g., "31½ Hours") in time parsing - Remove obsolete seek token functionality - Simplify data structures to match new response format 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
377b1723c3
commit
5bec3b6a7c
1 changed files with 127 additions and 162 deletions
|
@ -1,12 +1,13 @@
|
||||||
package fun
|
package fun
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -20,48 +21,26 @@ type HLTBPlugin struct {
|
||||||
httpClient *http.Client
|
httpClient *http.Client
|
||||||
}
|
}
|
||||||
|
|
||||||
// HLTBSearchRequest represents the search request payload
|
// HLTBNextData represents the Next.js data structure from the page
|
||||||
type HLTBSearchRequest struct {
|
type HLTBNextData struct {
|
||||||
SearchType string `json:"searchType"`
|
Props struct {
|
||||||
SearchTerms []string `json:"searchTerms"`
|
PageProps struct {
|
||||||
SearchPage int `json:"searchPage"`
|
SearchResults []HLTBGame `json:"searchResults"`
|
||||||
Size int `json:"size"`
|
} `json:"pageProps"`
|
||||||
SearchOptions map[string]interface{} `json:"searchOptions"`
|
} `json:"props"`
|
||||||
UseCache bool `json:"useCache"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// HLTBGame represents a game from HowLongToBeat
|
// HLTBGame represents a game from HowLongToBeat
|
||||||
type HLTBGame struct {
|
type HLTBGame struct {
|
||||||
ID int `json:"game_id"`
|
ID int `json:"game_id"`
|
||||||
Name string `json:"game_name"`
|
Name string `json:"game_name"`
|
||||||
GameAlias string `json:"game_alias"`
|
GameAlias string `json:"game_alias"`
|
||||||
GameImage string `json:"game_image"`
|
GameImage string `json:"game_image"`
|
||||||
CompMain int `json:"comp_main"`
|
CompMain int `json:"comp_main"`
|
||||||
CompPlus int `json:"comp_plus"`
|
CompPlus int `json:"comp_plus"`
|
||||||
CompComplete int `json:"comp_complete"`
|
CompComplete int `json:"comp_100"`
|
||||||
CompAll int `json:"comp_all"`
|
CompAll int `json:"comp_all"`
|
||||||
InvestedCo int `json:"invested_co"`
|
ReviewScore int `json:"review_score"`
|
||||||
InvestedMp int `json:"invested_mp"`
|
|
||||||
CountComp int `json:"count_comp"`
|
|
||||||
CountSpeedruns int `json:"count_speedruns"`
|
|
||||||
CountBacklog int `json:"count_backlog"`
|
|
||||||
CountReview int `json:"count_review"`
|
|
||||||
ReviewScore int `json:"review_score"`
|
|
||||||
CountPlaying int `json:"count_playing"`
|
|
||||||
CountRetired int `json:"count_retired"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// HLTBSearchResponse represents the search response
|
|
||||||
type HLTBSearchResponse struct {
|
|
||||||
Color string `json:"color"`
|
|
||||||
Title string `json:"title"`
|
|
||||||
Category string `json:"category"`
|
|
||||||
Count int `json:"count"`
|
|
||||||
Pagecurrent int `json:"pagecurrent"`
|
|
||||||
Pagesize int `json:"pagesize"`
|
|
||||||
Pagetotal int `json:"pagetotal"`
|
|
||||||
SearchTerm string `json:"searchTerm"`
|
|
||||||
SearchResults []HLTBGame `json:"data"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewHLTB creates a new HLTBPlugin instance
|
// NewHLTB creates a new HLTBPlugin instance
|
||||||
|
@ -153,90 +132,27 @@ func (p *HLTBPlugin) OnMessage(msg *model.Message, config map[string]interface{}
|
||||||
return []*model.MessageAction{action}
|
return []*model.MessageAction{action}
|
||||||
}
|
}
|
||||||
|
|
||||||
// searchGame searches for a game on HowLongToBeat
|
// searchGame searches for a game on HowLongToBeat using the new web scraping approach
|
||||||
func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) {
|
func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) {
|
||||||
// Split search terms by words
|
// Create search URL with query parameter
|
||||||
searchTerms := strings.Fields(gameName)
|
searchURL := fmt.Sprintf("https://howlongtobeat.com/?q=%s", url.QueryEscape(gameName))
|
||||||
|
|
||||||
// Prepare search request
|
|
||||||
searchRequest := HLTBSearchRequest{
|
|
||||||
SearchType: "games",
|
|
||||||
SearchTerms: searchTerms,
|
|
||||||
SearchPage: 1,
|
|
||||||
Size: 20,
|
|
||||||
SearchOptions: map[string]interface{}{
|
|
||||||
"games": map[string]interface{}{
|
|
||||||
"userId": 0,
|
|
||||||
"platform": "",
|
|
||||||
"sortCategory": "popular",
|
|
||||||
"rangeCategory": "main",
|
|
||||||
"rangeTime": map[string]interface{}{
|
|
||||||
"min": nil,
|
|
||||||
"max": nil,
|
|
||||||
},
|
|
||||||
"gameplay": map[string]interface{}{
|
|
||||||
"perspective": "",
|
|
||||||
"flow": "",
|
|
||||||
"genre": "",
|
|
||||||
"difficulty": "",
|
|
||||||
},
|
|
||||||
"rangeYear": map[string]interface{}{
|
|
||||||
"min": "",
|
|
||||||
"max": "",
|
|
||||||
},
|
|
||||||
"modifier": "",
|
|
||||||
},
|
|
||||||
"users": map[string]interface{}{
|
|
||||||
"sortCategory": "postcount",
|
|
||||||
},
|
|
||||||
"lists": map[string]interface{}{
|
|
||||||
"sortCategory": "follows",
|
|
||||||
},
|
|
||||||
"filter": "",
|
|
||||||
"sort": 0,
|
|
||||||
"randomizer": 0,
|
|
||||||
},
|
|
||||||
UseCache: true,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert to JSON
|
|
||||||
jsonData, err := json.Marshal(searchRequest)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to marshal search request: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// The API endpoint appears to have changed to use dynamic tokens
|
|
||||||
// Try to get the seek token first, fallback to basic search
|
|
||||||
seekToken, err := p.getSeekToken()
|
|
||||||
if err != nil {
|
|
||||||
// Fallback to old endpoint
|
|
||||||
seekToken = ""
|
|
||||||
}
|
|
||||||
|
|
||||||
var apiURL string
|
|
||||||
if seekToken != "" {
|
|
||||||
apiURL = fmt.Sprintf("https://howlongtobeat.com/api/seek/%s", seekToken)
|
|
||||||
} else {
|
|
||||||
apiURL = "https://howlongtobeat.com/api/search"
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create HTTP request
|
// Create HTTP request
|
||||||
req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData))
|
req, err := http.NewRequest("GET", searchURL, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set headers to match the working curl request
|
// Set headers to match browser request
|
||||||
req.Header.Set("Accept", "*/*")
|
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8")
|
||||||
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||||
req.Header.Set("Cache-Control", "no-cache")
|
req.Header.Set("Cache-Control", "no-cache")
|
||||||
req.Header.Set("Content-Type", "application/json")
|
|
||||||
req.Header.Set("Origin", "https://howlongtobeat.com")
|
|
||||||
req.Header.Set("Pragma", "no-cache")
|
req.Header.Set("Pragma", "no-cache")
|
||||||
req.Header.Set("Referer", "https://howlongtobeat.com")
|
req.Header.Set("Sec-Fetch-Dest", "document")
|
||||||
req.Header.Set("Sec-Fetch-Dest", "empty")
|
req.Header.Set("Sec-Fetch-Mode", "navigate")
|
||||||
req.Header.Set("Sec-Fetch-Mode", "cors")
|
req.Header.Set("Sec-Fetch-Site", "none")
|
||||||
req.Header.Set("Sec-Fetch-Site", "same-origin")
|
req.Header.Set("Sec-Fetch-User", "?1")
|
||||||
|
req.Header.Set("Upgrade-Insecure-Requests", "1")
|
||||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36")
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36")
|
||||||
|
|
||||||
// Send request
|
// Send request
|
||||||
|
@ -249,7 +165,7 @@ func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) {
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
return nil, fmt.Errorf("API returned status code: %d", resp.StatusCode)
|
return nil, fmt.Errorf("HTTP request failed with status code: %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read response body
|
// Read response body
|
||||||
|
@ -258,13 +174,105 @@ func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) {
|
||||||
return nil, fmt.Errorf("failed to read response: %w", err)
|
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse response
|
// Parse games from HTML
|
||||||
var searchResponse HLTBSearchResponse
|
games, err := p.parseGamesFromHTML(string(body))
|
||||||
if err := json.Unmarshal(body, &searchResponse); err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
return nil, fmt.Errorf("failed to parse games from HTML: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return searchResponse.SearchResults, nil
|
return games, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseGamesFromHTML extracts game information from the HTML response
|
||||||
|
func (p *HLTBPlugin) parseGamesFromHTML(html string) ([]HLTBGame, error) {
|
||||||
|
var games []HLTBGame
|
||||||
|
|
||||||
|
// First, try to parse from the __NEXT_DATA__ JSON
|
||||||
|
jsonDataPattern := `<script id="__NEXT_DATA__" type="application/json">([^<]+)</script>`
|
||||||
|
jsonRegex := regexp.MustCompile(jsonDataPattern)
|
||||||
|
jsonMatches := jsonRegex.FindStringSubmatch(html)
|
||||||
|
|
||||||
|
if len(jsonMatches) > 1 {
|
||||||
|
var nextData HLTBNextData
|
||||||
|
if err := json.Unmarshal([]byte(jsonMatches[1]), &nextData); err == nil {
|
||||||
|
// Try to extract search results from the Next.js data
|
||||||
|
if nextData.Props.PageProps.SearchResults != nil {
|
||||||
|
return nextData.Props.PageProps.SearchResults, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to HTML parsing for game cards
|
||||||
|
// Pattern to match game cards in the HTML
|
||||||
|
gameCardPattern := `<li class="back_darkish GameCard_search_list__[^"]*"[^>]*>.*?href="/game/(\d+)"[^>]*>([^<]+)</a>.*?</li>`
|
||||||
|
gameCardRegex := regexp.MustCompile(gameCardPattern)
|
||||||
|
gameCards := gameCardRegex.FindAllStringSubmatch(html, -1)
|
||||||
|
|
||||||
|
for _, match := range gameCards {
|
||||||
|
if len(match) >= 3 {
|
||||||
|
gameID, err := strconv.Atoi(match[1])
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
gameName := strings.TrimSpace(match[2])
|
||||||
|
if gameName == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract completion times from the game card
|
||||||
|
gameCardHTML := match[0]
|
||||||
|
compMain := p.extractTimeFromHTML(gameCardHTML, "Main Story")
|
||||||
|
compPlus := p.extractTimeFromHTML(gameCardHTML, "Main + Extra")
|
||||||
|
compComplete := p.extractTimeFromHTML(gameCardHTML, "Completionist")
|
||||||
|
|
||||||
|
// Extract game image
|
||||||
|
gameImage := p.extractGameImage(gameCardHTML)
|
||||||
|
|
||||||
|
game := HLTBGame{
|
||||||
|
ID: gameID,
|
||||||
|
Name: gameName,
|
||||||
|
GameImage: gameImage,
|
||||||
|
CompMain: compMain,
|
||||||
|
CompPlus: compPlus,
|
||||||
|
CompComplete: compComplete,
|
||||||
|
}
|
||||||
|
|
||||||
|
games = append(games, game)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return games, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractTimeFromHTML extracts time values from HTML content
|
||||||
|
func (p *HLTBPlugin) extractTimeFromHTML(html, category string) int {
|
||||||
|
// Pattern to match time values after category labels
|
||||||
|
pattern := fmt.Sprintf(`%s.*?(\d+(?:½)?)\s*Hours?`, regexp.QuoteMeta(category))
|
||||||
|
timeRegex := regexp.MustCompile(pattern)
|
||||||
|
match := timeRegex.FindStringSubmatch(html)
|
||||||
|
|
||||||
|
if len(match) > 1 {
|
||||||
|
timeStr := strings.ReplaceAll(match[1], "½", ".5")
|
||||||
|
if timeFloat, err := strconv.ParseFloat(timeStr, 64); err == nil {
|
||||||
|
return int(timeFloat * 3600) // Convert to seconds
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractGameImage extracts the game image from HTML content
|
||||||
|
func (p *HLTBPlugin) extractGameImage(html string) string {
|
||||||
|
imagePattern := `src="https://howlongtobeat\.com/games/([^"]+)"`
|
||||||
|
imageRegex := regexp.MustCompile(imagePattern)
|
||||||
|
match := imageRegex.FindStringSubmatch(html)
|
||||||
|
|
||||||
|
if len(match) > 1 {
|
||||||
|
return match[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// formatGameInfo formats game information for display
|
// formatGameInfo formats game information for display
|
||||||
|
@ -331,49 +339,6 @@ func (p *HLTBPlugin) getFullImageURL(imagePath string) string {
|
||||||
return fmt.Sprintf("https://howlongtobeat.com/games/%s", imagePath)
|
return fmt.Sprintf("https://howlongtobeat.com/games/%s", imagePath)
|
||||||
}
|
}
|
||||||
|
|
||||||
// getSeekToken attempts to retrieve the seek token from HowLongToBeat
|
|
||||||
func (p *HLTBPlugin) getSeekToken() (string, error) {
|
|
||||||
// Try to extract the seek token from the main page
|
|
||||||
req, err := http.NewRequest("GET", "https://howlongtobeat.com", nil)
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("failed to create token request: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36")
|
|
||||||
|
|
||||||
resp, err := p.httpClient.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("failed to fetch token: %w", err)
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
_ = resp.Body.Close()
|
|
||||||
}()
|
|
||||||
|
|
||||||
body, err := io.ReadAll(resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("failed to read token response: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Look for patterns that might contain the token
|
|
||||||
patterns := []string{
|
|
||||||
`/api/seek/([a-f0-9]+)`,
|
|
||||||
`"seek/([a-f0-9]+)"`,
|
|
||||||
`seek/([a-f0-9]{12,})`,
|
|
||||||
}
|
|
||||||
|
|
||||||
bodyStr := string(body)
|
|
||||||
for _, pattern := range patterns {
|
|
||||||
re := regexp.MustCompile(pattern)
|
|
||||||
matches := re.FindStringSubmatch(bodyStr)
|
|
||||||
if len(matches) > 1 {
|
|
||||||
return matches[1], nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we can't extract a token, return the known working one as fallback
|
|
||||||
return "d4b2e330db04dbf3", nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// createErrorResponse creates an error response message
|
// createErrorResponse creates an error response message
|
||||||
func (p *HLTBPlugin) createErrorResponse(msg *model.Message, errorText string) []*model.MessageAction {
|
func (p *HLTBPlugin) createErrorResponse(msg *model.Message, errorText string) []*model.MessageAction {
|
||||||
response := &model.Message{
|
response := &model.Message{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue