butterrobot/internal/plugin/fun/hltb.go
Felipe M. 5bec3b6a7c
Some checks failed
ci/woodpecker/push/ci Pipeline failed
ci/woodpecker/tag/release Pipeline was successful
fix: update hltb plugin to work with new website structure
The HowLongToBeat website has changed from API-based search to server-side
rendering with Next.js. This update fixes the plugin to work with the new format:

- Switch from POST API requests to GET requests with query parameters
- Replace JSON API parsing with HTML content parsing
- Add support for parsing Next.js __NEXT_DATA__ embedded JSON
- Add fallback regex-based HTML parsing for game cards
- Extract completion times, game names, IDs, and cover images from HTML
- Support half-hour notation (e.g., "31½ Hours") in time parsing
- Remove obsolete seek token functionality
- Simplify data structures to match new response format

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-15 18:55:18 +02:00

359 lines
9.9 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package fun
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"regexp"
"strconv"
"strings"
"time"
"git.nakama.town/fmartingr/butterrobot/internal/model"
"git.nakama.town/fmartingr/butterrobot/internal/plugin"
)
// HLTBPlugin searches HowLongToBeat for game completion times
type HLTBPlugin struct {
plugin.BasePlugin
httpClient *http.Client
}
// HLTBNextData represents the Next.js data structure from the page
type HLTBNextData struct {
Props struct {
PageProps struct {
SearchResults []HLTBGame `json:"searchResults"`
} `json:"pageProps"`
} `json:"props"`
}
// HLTBGame represents a game from HowLongToBeat
type HLTBGame struct {
ID int `json:"game_id"`
Name string `json:"game_name"`
GameAlias string `json:"game_alias"`
GameImage string `json:"game_image"`
CompMain int `json:"comp_main"`
CompPlus int `json:"comp_plus"`
CompComplete int `json:"comp_100"`
CompAll int `json:"comp_all"`
ReviewScore int `json:"review_score"`
}
// NewHLTB creates a new HLTBPlugin instance
func NewHLTB() *HLTBPlugin {
return &HLTBPlugin{
BasePlugin: plugin.BasePlugin{
ID: "fun.hltb",
Name: "How Long To Beat",
Help: "Get game completion times from HowLongToBeat.com using `!hltb <game name>`",
},
httpClient: &http.Client{
Timeout: 10 * time.Second,
},
}
}
// OnMessage handles incoming messages
func (p *HLTBPlugin) OnMessage(msg *model.Message, config map[string]interface{}, cache model.CacheInterface) []*model.MessageAction {
// Check if message starts with !hltb
text := strings.TrimSpace(msg.Text)
if !strings.HasPrefix(text, "!hltb ") {
return nil
}
// Extract game name
gameName := strings.TrimSpace(text[6:]) // Remove "!hltb "
if gameName == "" {
return p.createErrorResponse(msg, "Please provide a game name. Usage: !hltb <game name>")
}
// Check cache first
var games []HLTBGame
var err error
cacheKey := strings.ToLower(gameName)
err = cache.Get(cacheKey, &games)
if err != nil || len(games) == 0 {
// Cache miss - search for the game
games, err = p.searchGame(gameName)
if err != nil {
return p.createErrorResponse(msg, fmt.Sprintf("Error searching for game: %s", err.Error()))
}
if len(games) == 0 {
return p.createErrorResponse(msg, fmt.Sprintf("No results found for '%s'", gameName))
}
// Cache the results for 1 hour
err = cache.SetWithTTL(cacheKey, games, time.Hour)
if err != nil {
// Log cache error but don't fail the request
fmt.Printf("Warning: Failed to cache HLTB results: %v\n", err)
}
}
// Use the first result
game := games[0]
// Format the response
response := p.formatGameInfo(game)
// Create response message with game cover if available
responseMsg := &model.Message{
Text: response,
Chat: msg.Chat,
ReplyTo: msg.ID,
Channel: msg.Channel,
}
// Set parse mode for markdown formatting
if responseMsg.Raw == nil {
responseMsg.Raw = make(map[string]interface{})
}
responseMsg.Raw["parse_mode"] = "Markdown"
// Add game cover as attachment if available
if game.GameImage != "" {
imageURL := p.getFullImageURL(game.GameImage)
responseMsg.Raw["image_url"] = imageURL
}
action := &model.MessageAction{
Type: model.ActionSendMessage,
Message: responseMsg,
Chat: msg.Chat,
Channel: msg.Channel,
}
return []*model.MessageAction{action}
}
// searchGame searches for a game on HowLongToBeat using the new web scraping approach
func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) {
// Create search URL with query parameter
searchURL := fmt.Sprintf("https://howlongtobeat.com/?q=%s", url.QueryEscape(gameName))
// Create HTTP request
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
// Set headers to match browser request
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("Pragma", "no-cache")
req.Header.Set("Sec-Fetch-Dest", "document")
req.Header.Set("Sec-Fetch-Mode", "navigate")
req.Header.Set("Sec-Fetch-Site", "none")
req.Header.Set("Sec-Fetch-User", "?1")
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36")
// Send request
resp, err := p.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer func() {
_ = resp.Body.Close()
}()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("HTTP request failed with status code: %d", resp.StatusCode)
}
// Read response body
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
// Parse games from HTML
games, err := p.parseGamesFromHTML(string(body))
if err != nil {
return nil, fmt.Errorf("failed to parse games from HTML: %w", err)
}
return games, nil
}
// parseGamesFromHTML extracts game information from the HTML response
func (p *HLTBPlugin) parseGamesFromHTML(html string) ([]HLTBGame, error) {
var games []HLTBGame
// First, try to parse from the __NEXT_DATA__ JSON
jsonDataPattern := `<script id="__NEXT_DATA__" type="application/json">([^<]+)</script>`
jsonRegex := regexp.MustCompile(jsonDataPattern)
jsonMatches := jsonRegex.FindStringSubmatch(html)
if len(jsonMatches) > 1 {
var nextData HLTBNextData
if err := json.Unmarshal([]byte(jsonMatches[1]), &nextData); err == nil {
// Try to extract search results from the Next.js data
if nextData.Props.PageProps.SearchResults != nil {
return nextData.Props.PageProps.SearchResults, nil
}
}
}
// Fallback to HTML parsing for game cards
// Pattern to match game cards in the HTML
gameCardPattern := `<li class="back_darkish GameCard_search_list__[^"]*"[^>]*>.*?href="/game/(\d+)"[^>]*>([^<]+)</a>.*?</li>`
gameCardRegex := regexp.MustCompile(gameCardPattern)
gameCards := gameCardRegex.FindAllStringSubmatch(html, -1)
for _, match := range gameCards {
if len(match) >= 3 {
gameID, err := strconv.Atoi(match[1])
if err != nil {
continue
}
gameName := strings.TrimSpace(match[2])
if gameName == "" {
continue
}
// Extract completion times from the game card
gameCardHTML := match[0]
compMain := p.extractTimeFromHTML(gameCardHTML, "Main Story")
compPlus := p.extractTimeFromHTML(gameCardHTML, "Main + Extra")
compComplete := p.extractTimeFromHTML(gameCardHTML, "Completionist")
// Extract game image
gameImage := p.extractGameImage(gameCardHTML)
game := HLTBGame{
ID: gameID,
Name: gameName,
GameImage: gameImage,
CompMain: compMain,
CompPlus: compPlus,
CompComplete: compComplete,
}
games = append(games, game)
}
}
return games, nil
}
// extractTimeFromHTML extracts time values from HTML content
func (p *HLTBPlugin) extractTimeFromHTML(html, category string) int {
// Pattern to match time values after category labels
pattern := fmt.Sprintf(`%s.*?(\d+(?:½)?)\s*Hours?`, regexp.QuoteMeta(category))
timeRegex := regexp.MustCompile(pattern)
match := timeRegex.FindStringSubmatch(html)
if len(match) > 1 {
timeStr := strings.ReplaceAll(match[1], "½", ".5")
if timeFloat, err := strconv.ParseFloat(timeStr, 64); err == nil {
return int(timeFloat * 3600) // Convert to seconds
}
}
return 0
}
// extractGameImage extracts the game image from HTML content
func (p *HLTBPlugin) extractGameImage(html string) string {
imagePattern := `src="https://howlongtobeat\.com/games/([^"]+)"`
imageRegex := regexp.MustCompile(imagePattern)
match := imageRegex.FindStringSubmatch(html)
if len(match) > 1 {
return match[1]
}
return ""
}
// formatGameInfo formats game information for display
func (p *HLTBPlugin) formatGameInfo(game HLTBGame) string {
var response strings.Builder
response.WriteString(fmt.Sprintf("🎮 **%s**\n\n", game.Name))
// Format completion times
if game.CompMain > 0 {
response.WriteString(fmt.Sprintf("📖 **Main Story:** %s\n", p.formatTime(game.CompMain)))
}
if game.CompPlus > 0 {
response.WriteString(fmt.Sprintf(" **Main + Extras:** %s\n", p.formatTime(game.CompPlus)))
}
if game.CompComplete > 0 {
response.WriteString(fmt.Sprintf("💯 **Completionist:** %s\n", p.formatTime(game.CompComplete)))
}
if game.CompAll > 0 {
response.WriteString(fmt.Sprintf("🎯 **All Styles:** %s\n", p.formatTime(game.CompAll)))
}
// Add review score if available
if game.ReviewScore > 0 {
response.WriteString(fmt.Sprintf("\n⭐ **User Score:** %d/100", game.ReviewScore))
}
// Add source attribution
response.WriteString("\n\n*Source: HowLongToBeat.com*")
return response.String()
}
// formatTime converts seconds to a readable time format
func (p *HLTBPlugin) formatTime(seconds int) string {
if seconds <= 0 {
return "N/A"
}
hours := float64(seconds) / 3600.0
if hours < 1 {
minutes := seconds / 60
return fmt.Sprintf("%d minutes", minutes)
} else if hours < 2 {
return fmt.Sprintf("%.1f hour", hours)
} else {
return fmt.Sprintf("%.1f hours", hours)
}
}
// getFullImageURL constructs the full image URL
func (p *HLTBPlugin) getFullImageURL(imagePath string) string {
if imagePath == "" {
return ""
}
// Remove leading slash if present
imagePath = strings.TrimPrefix(imagePath, "/")
return fmt.Sprintf("https://howlongtobeat.com/games/%s", imagePath)
}
// createErrorResponse creates an error response message
func (p *HLTBPlugin) createErrorResponse(msg *model.Message, errorText string) []*model.MessageAction {
response := &model.Message{
Text: fmt.Sprintf("❌ %s", errorText),
Chat: msg.Chat,
ReplyTo: msg.ID,
Channel: msg.Channel,
}
action := &model.MessageAction{
Type: model.ActionSendMessage,
Message: response,
Chat: msg.Chat,
Channel: msg.Channel,
}
return []*model.MessageAction{action}
}