| 
									
										
										
										
											2025-03-21 23:58:13 +02:00
										 |  |  | // GoToSocial | 
					
						
							|  |  |  | // Copyright (C) GoToSocial Authors admin@gotosocial.org | 
					
						
							|  |  |  | // SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | // (at your option) any later version. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  | // GNU Affero General Public License for more details. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License | 
					
						
							|  |  |  | // along with this program.  If not, see <http://www.gnu.org/licenses/>. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package status | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							| 
									
										
										
										
											2025-04-08 19:54:18 +03:00
										 |  |  | 	"context" | 
					
						
							| 
									
										
										
										
											2025-03-21 23:58:13 +02:00
										 |  |  | 	"fmt" | 
					
						
							|  |  |  | 	"net/http" | 
					
						
							|  |  |  | 	"net/url" | 
					
						
							|  |  |  | 	"regexp" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/PuerkitoBio/goquery" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/superseriousbusiness/gotosocial/internal/gtserror" | 
					
						
							|  |  |  | 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" | 
					
						
							| 
									
										
										
										
											2025-04-08 19:54:18 +03:00
										 |  |  | 	"github.com/superseriousbusiness/gotosocial/internal/httpclient" | 
					
						
							| 
									
										
										
										
											2025-03-21 23:58:13 +02:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var urlRegex = regexp.MustCompile(`https?://[a-zA-Z0-9./?=_-]+`) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func extractLastURL(text string) string { | 
					
						
							|  |  |  | 	matches := urlRegex.FindAllString(text, -1) | 
					
						
							|  |  |  | 	if len(matches) == 0 { | 
					
						
							|  |  |  | 		return "" | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return matches[len(matches)-1] | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // FetchPreview retrieves OpenGraph metadata from a URL. | 
					
						
							| 
									
										
										
										
											2025-04-08 19:54:18 +03:00
										 |  |  | func FetchPreview(ctx context.Context, httpClient *httpclient.Client, text string) (*gtsmodel.Card, gtserror.WithCode) { | 
					
						
							| 
									
										
										
										
											2025-03-21 23:58:13 +02:00
										 |  |  | 	link := extractLastURL(text) | 
					
						
							|  |  |  | 	if link == "" { | 
					
						
							|  |  |  | 		return nil, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	parsed, err := url.ParseRequestURI(link) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, gtserror.NewErrorInternalError(err, "invalid URL") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if parsed.Scheme != "http" && parsed.Scheme != "https" { | 
					
						
							|  |  |  | 		return nil, gtserror.NewErrorInternalError(fmt.Errorf("unsupported scheme: %s", parsed.Scheme)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-08 19:54:18 +03:00
										 |  |  | 	req, err := http.NewRequestWithContext(ctx, http.MethodGet, link, nil) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, gtserror.NewErrorInternalError(err, "failed to create request") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	resp, err := httpClient.Do(req) | 
					
						
							| 
									
										
										
										
											2025-03-21 23:58:13 +02:00
										 |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, gtserror.NewErrorInternalError(err, "request failed") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	defer resp.Body.Close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if resp.StatusCode != http.StatusOK { | 
					
						
							|  |  |  | 		return nil, gtserror.NewErrorInternalError(fmt.Errorf("unexpected status: %s", resp.Status)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	doc, err := goquery.NewDocumentFromReader(resp.Body) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, gtserror.NewErrorInternalError(fmt.Errorf("failed to parse HTML: %w", err)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	card := >smodel.Card{ | 
					
						
							|  |  |  | 		URL: link, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	doc.Find("meta").Each(func(i int, s *goquery.Selection) { | 
					
						
							|  |  |  | 		property, _ := s.Attr("property") | 
					
						
							|  |  |  | 		content, _ := s.Attr("content") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		switch property { | 
					
						
							|  |  |  | 		case "og:title": | 
					
						
							|  |  |  | 			card.Title = content | 
					
						
							|  |  |  | 		case "og:description": | 
					
						
							|  |  |  | 			card.Description = content | 
					
						
							|  |  |  | 		case "og:type": | 
					
						
							|  |  |  | 			card.Type = content | 
					
						
							|  |  |  | 		case "og:image": | 
					
						
							|  |  |  | 			card.Image = content | 
					
						
							|  |  |  | 		case "og:url": | 
					
						
							|  |  |  | 			if content != "" { | 
					
						
							|  |  |  | 				card.URL = content | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		case "og:site_name": | 
					
						
							|  |  |  | 			card.ProviderName = content | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if card.Title == "" { | 
					
						
							|  |  |  | 		card.Title = doc.Find("title").Text() | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if card.Description == "" { | 
					
						
							|  |  |  | 		desc, exists := doc.Find("meta[name='description']").Attr("content") | 
					
						
							|  |  |  | 		if exists { | 
					
						
							|  |  |  | 			card.Description = desc | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return card, nil | 
					
						
							|  |  |  | } |