Files
PortProtonQt/portprotonqt/search_utils.py
Boris Yumankulov 58bbff8e69
All checks were successful
Code check / Check code (push) Successful in 1m45s
chore: clean all vulture 80% confidence dead code
Signed-off-by: Boris Yumankulov <boria138@altlinux.org>
2025-12-21 19:34:32 +05:00

241 lines
8.5 KiB
Python

"""
Utility module for search optimizations including Trie, hash tables, and fuzzy matching.
"""
from typing import Any
from rapidfuzz import fuzz
from threading import Lock
from portprotonqt.logger import get_logger
from PySide6.QtCore import QThread, Signal, QObject
logger = get_logger(__name__)
class TrieNode:
"""Node in the Trie data structure."""
def __init__(self):
self.children = {}
self.is_end_word = False
self.payload = None # Store the original data in leaf nodes
class Trie:
"""Trie data structure for efficient prefix-based searching."""
def __init__(self):
self.root = TrieNode()
self._lock = Lock() # Thread safety for concurrent access
def insert(self, key: str, payload: Any):
"""Insert a key with payload into the Trie."""
with self._lock:
node = self.root
for char in key.lower():
if char not in node.children:
node.children[char] = TrieNode()
node = node.children[char]
node.is_end_word = True
node.payload = payload
def search_prefix(self, prefix: str) -> list[tuple[str, Any]]:
"""Find all entries with the given prefix."""
with self._lock:
node = self.root
for char in prefix.lower():
if char not in node.children:
return []
node = node.children[char]
results = []
self._collect_all(node, prefix.lower(), results)
return results
def _collect_all(self, node: TrieNode, current_prefix: str, results: list[tuple[str, Any]]):
"""Collect all entries from the current node."""
if node.is_end_word:
results.append((current_prefix, node.payload))
for char, child_node in node.children.items():
self._collect_all(child_node, current_prefix + char, results)
class FuzzySearchIndex:
"""Index for fuzzy string matching with rapidfuzz."""
def __init__(self, items: list[tuple[str, Any]] | None = None):
self.items: list[tuple[str, Any]] = items or []
self.normalized_items: list[tuple[str, Any]] = []
self._lock = Lock()
self._build_normalized_index()
def _build_normalized_index(self):
"""Build a normalized index for fuzzy matching."""
with self._lock:
self.normalized_items = [(self._normalize(item[0]), item[1]) for item in self.items]
def _normalize(self, s: str) -> str:
"""Normalize string for fuzzy matching."""
s = s.lower()
for ch in ["", "®"]:
s = s.replace(ch, "")
for ch in ["-", ":", ","]:
s = s.replace(ch, " ")
s = " ".join(s.split())
for suffix in ["bin", "app"]:
if s.endswith(suffix):
s = s[:-len(suffix)].strip()
keywords_to_remove = {"ultimate", "edition", "definitive", "complete", "remastered"}
words = s.split()
filtered_words = [word for word in words if word not in keywords_to_remove]
return " ".join(filtered_words)
def fuzzy_search(self, query: str, limit: int = 5, min_score: float = 60.0) -> list[tuple[str, Any, float]]:
"""Perform fuzzy search using rapidfuzz."""
with self._lock:
if not query or not self.normalized_items:
return []
query_normalized = self._normalize(query)
results = []
for i, (item_text, item_data) in enumerate(self.normalized_items):
score = fuzz.ratio(query_normalized, item_text)
if score >= min_score:
results.append((self.items[i][0], item_data, score))
# Sort by score descending
results.sort(key=lambda x: x[2], reverse=True)
return results[:limit]
class SearchOptimizer:
"""Main search optimization class combining multiple approaches."""
def __init__(self):
self.hash_index: dict[str, Any] = {}
self.trie_index = Trie()
self.fuzzy_index = None
self._lock = Lock()
def build_indices(self, items: list[tuple[str, Any]]):
"""Build all search indices from items."""
with self._lock:
self.hash_index = {item[0].lower(): item[1] for item in items}
self.trie_index = Trie()
for key, value in self.hash_index.items():
self.trie_index.insert(key, value)
self.fuzzy_index = FuzzySearchIndex(items)
def exact_search(self, key: str) -> Any | None:
"""Perform exact hash-based lookup."""
with self._lock:
return self.hash_index.get(key.lower())
def prefix_search(self, prefix: str) -> list[tuple[str, Any]]:
"""Perform prefix search using Trie."""
with self._lock:
return self.trie_index.search_prefix(prefix)
def fuzzy_search(self, query: str, limit: int = 5, min_score: float = 60.0) -> list[tuple[str, Any, float]]:
"""Perform fuzzy search."""
if self.fuzzy_index:
return self.fuzzy_index.fuzzy_search(query, limit, min_score)
return []
# Threaded search implementation using QThread for performance optimization
class ThreadedSearchWorker(QObject):
"""
A threaded worker for performing search operations without blocking the UI.
"""
search_started = Signal()
search_finished = Signal(list)
search_error = Signal(str)
def __init__(self):
super().__init__()
self.search_optimizer = SearchOptimizer()
self.games_data = []
def set_games_data(self, games_data: list):
"""Set the games data to be searched."""
self.games_data = games_data
# Build indices from the games data (name, description, etc.)
items = [(game[0], game) for game in games_data] # game[0] is the name
self.search_optimizer.build_indices(items)
def execute_search(self, search_text: str, search_type: str = "auto"):
"""
Execute search in a separate thread.
Args:
search_text: Text to search for
search_type: Type of search ("exact", "prefix", "fuzzy", "auto")
"""
try:
self.search_started.emit()
import time
start_time = time.time()
results = []
if search_type == "exact" or (search_type == "auto" and len(search_text) > 2):
exact_result = self.search_optimizer.exact_search(search_text)
if exact_result:
results = [exact_result]
elif search_type == "prefix":
results = self.search_optimizer.prefix_search(search_text)
elif search_type == "fuzzy" or search_type == "auto":
results = self.search_optimizer.fuzzy_search(search_text, limit=20, min_score=50.0)
else:
# Auto-detect search type based on input
if len(search_text) < 3:
results = self.search_optimizer.prefix_search(search_text)
else:
# Try exact first, then fuzzy
exact_result = self.search_optimizer.exact_search(search_text)
if exact_result:
results = [exact_result]
else:
results = self.search_optimizer.fuzzy_search(search_text, limit=20, min_score=50.0)
end_time = time.time()
print(f"Search completed in {end_time - start_time:.4f} seconds")
self.search_finished.emit(results)
except Exception as e:
self.search_error.emit(str(e))
class ThreadedSearch(QThread):
"""
QThread implementation for running search operations in the background.
"""
search_started = Signal()
search_finished = Signal(list)
search_error = Signal(str)
def __init__(self, parent=None):
super().__init__(parent)
self.worker = ThreadedSearchWorker()
self.search_text = ""
self.search_type = "auto"
self.games_data = []
# Connect worker signals to thread signals
self.worker.search_started.connect(self.search_started)
self.worker.search_finished.connect(self.search_finished)
self.worker.search_error.connect(self.search_error)
def set_games_data(self, games_data: list):
"""Set the games data to be searched."""
self.games_data = games_data
self.worker.set_games_data(games_data)
def run(self):
"""Run the search operation in the thread."""
self.worker.execute_search(self.search_text, self.search_type)