From c9fddc25f672bc974da60edcfa3c2cbe0e0103b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=95=D0=B2=D0=B3=D0=B5=D0=BD=D0=B8=D0=B9=20=D0=A5=D1=80?= =?UTF-8?q?=D0=B0=D0=BC=D0=BE=D0=B2?= Date: Sun, 26 May 2024 16:56:31 +0300 Subject: [PATCH] =?UTF-8?q?discord=20=D0=BF=D1=80=D0=BE=D0=BC=D0=B5=D0=B6?= =?UTF-8?q?=D1=83=D1=82=D0=BE=D1=87=D0=BD=D1=8B=D0=B9=20=D1=80=D0=B5=D0=B7?= =?UTF-8?q?=D1=83=D0=BB=D1=8C=D1=82=D0=B0=D1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- news-bot.py | 298 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 196 insertions(+), 102 deletions(-) diff --git a/news-bot.py b/news-bot.py index 1a3532c..725fee2 100755 --- a/news-bot.py +++ b/news-bot.py @@ -7,6 +7,7 @@ import discord import logging import colorlog import requests +import html2text from bs4 import BeautifulSoup import keys @@ -14,7 +15,8 @@ import keys url_post = "https://linux-gaming.ru/posts.json" url_news = "https://linux-gaming.ru/c/news/6.json" -url_vk_api = "https://api.vk.com/method/wall.post" +url_vk_post = "https://api.vk.com/method/wall.post" +url_vk_get = "https://api.vk.com/method/wall.get" url_changelog = "https://gitlab.eterfund.ru/Castro-Fidel/PortWINE/raw/master/data_from_portwine/changelog_ru" heads_site = { @@ -23,8 +25,16 @@ heads_site = { "Api-Username": "linux-gaming" } +params_get = { + 'access_token': keys.api_key_vk, + 'v': '5.199', # Версия API + 'owner_id': keys.own_id, + 'count': 100, + 'offset': 0 +} + logger = logging.getLogger() -logger.setLevel(logging.INFO) +logger.setLevel(logging.DEBUG) handler = colorlog.StreamHandler() handler.setFormatter(colorlog.ColoredFormatter( @@ -42,12 +52,10 @@ logger.addHandler(handler) def main(): - try: - last_changelog, resp_changelog = resp_change() - check_version(last_changelog, resp_changelog) - check_discord_public() - except Exception as err: - logging.error(f"Ошибка исполнения функции main: {err}") + last_changelog, resp_changelog = resp_change() + check_version(last_changelog, resp_changelog) + check_discord_public() + check_vk_posts() def make_soup(resp_changelog): @@ -55,18 +63,78 @@ def make_soup(resp_changelog): def html_to_text(html_content): - soup = BeautifulSoup(html_content, 'html.parser') - text = soup.get_text(separator='\n') - links = [] - for a in soup.find_all('a', href=True): - links.append(a['href']) - return text, links + h = html2text.HTML2Text() + h.ignore_links = False # Сохранение ссылок + h.ignore_images = True # Игнорирование изображений + h.bypass_tables = True # Сохранение таблиц + h.reference_links = True # Сохранение оригинальных ссылок + markdown_text = h.handle(html_content) + + logging.debug(f"Markdown text для дискорд: {markdown_text}") + + # Удаление переносов строк из-за - + markdown_text = re.sub(r'-\s*\n\s*', '-', markdown_text, flags=re.DOTALL) + markdown_text = re.sub(r'-\s*\n*', '-', markdown_text, flags=re.DOTALL) + + # Убираем переносы строк внутри круглых скобок () + markdown_text = re.sub(r'\((.*?)\)', lambda x: '(' + x.group(1).replace('\n', ' ') + ')', markdown_text, flags=re.DOTALL) + + # Убираем переносы строк внутри квадратных скобок [] + markdown_text = re.sub(r'\[(.*?)\]', lambda x: '[' + x.group(1).replace('\n', ' ') + ']', markdown_text, flags=re.DOTALL) + + # Удаление переносов строк и пробелов внутри текста + markdown_text = re.sub(r'(?) last_text = f"###Scripts version {script_ver - 1}" last_text = str(last_text) @@ -95,8 +154,7 @@ def script_content(script_ver, resp_changelog): if index_script_ver != -1: changelog_text = changelog_text_last[index_script_ver:] - post_text = (f"-----------------------------\nОбновление скриптов {script_ver}\n" - f"-----------------------------\n") + changelog_text + post_text = (f"-----------------------------\nОбновление скриптов {script_ver}\t\n") + changelog_text site_text = (f"[center][img]/uploads/default/original/1X/5cfa59077a5275971401fab0114e56f3ffdd0ec4.png[/img][" f"/center]\n{post_text}") @@ -107,20 +165,13 @@ def script_content(script_ver, resp_changelog): "tags": ["scripts"] } - params = { - 'access_token': keys.api_key_vk, - 'v': '5.199', # Версия API VK - 'owner_id': keys.own_id, - 'message': f'{post_text}', - 'attachments': "photo-99238527_457244491" - # Дополнительные параметры можно добавить здесь - } - return post_text, post_data, params + + return post_text, post_data, post_text def news_content(post_id): logging.debug(f"Запрос содержимого поста новости с ID: {post_id}") - response = response_get(f"https://linux-gaming.ru/t/{post_id}.json") + response = response_get(f"https://linux-gaming.ru/t/{post_id}.json", heads_site) if response and response.status_code == 200: topic_data = response.json() posts = topic_data.get('post_stream', {}).get('posts', []) @@ -128,10 +179,9 @@ def news_content(post_id): for post in posts: if post.get('post_number') == 1: html_content = post.get('cooked', 'Нет содержимого') - text_data, links = html_to_text(html_content) - text_data = remove_empty_lines(text_data) + text_data = html_to_text(html_content) logging.debug(text_data) - return text_data, links + return text_data logging.error(f"Первый пост не найден в теме с ID: {post_id}") return None else: @@ -139,7 +189,7 @@ def news_content(post_id): return None -def response_get(url): +def response_get(url, heads_site): try: return requests.get(url, headers=heads_site) except requests.RequestException as err: @@ -147,7 +197,7 @@ def response_get(url): def resp_change(): - resp_changelog = response_get(url_changelog) + resp_changelog = response_get(url_changelog, heads_site) if resp_changelog and resp_changelog.status_code == 200: matches_changelog = re.findall(r'###Scripts version (\d+)###', resp_changelog.text) @@ -161,12 +211,12 @@ def resp_change(): return None, None -def resp_get(url): - return response_get(url) +def resp_get(url, heads_site): + return response_get(url, heads_site=heads_site) def news(): - resp_topics = resp_get(url_news) + resp_topics = resp_get(url_news, heads_site) if resp_topics.status_code == 200: data = resp_topics.json() @@ -199,22 +249,36 @@ def site_post(url, headers, json): time.sleep(900) -def vk_post(url, params): - try: - # Отправляем POST-запрос к VK API - resp_post = requests.post(url=url, params=params) - - if resp_post.status_code == 200: - logging.info("Сообщение успешно опубликовано.") - logging.info(resp_post.json()) # Выводим ответ сервера в формате JSON - else: - logging.error(f"Ошибка при публикации сообщения в ВК:, {resp_post.status_code}") - - return resp_post - except requests.RequestException as err: - logging.error(f"VK post failed: {err}") +def check_version(last_changelog, resp_changelog): + list_titles_and_ids = news() + pattern = re.compile(r'Обновление скриптов (\d+)') + + def extract_number(title): + match = pattern.search(title) + if match: + return int(match.group(1)) return None + numbers = [extract_number(title) for _, title in list_titles_and_ids if extract_number(title) is not None] + last_topics_script = max(numbers) + logging.info(f"Последняя новость на сайте о версии: {last_topics_script}") + + if last_topics_script < last_changelog: + list_new_ver = [] + + for script_ver in range(last_topics_script + 1, last_changelog + 1): + list_new_ver.append(script_ver) + logging.info(f"Найдена новая версия скрипта {script_ver}") + changelog_text, post_data, params = script_content(script_ver, resp_changelog) + if post_data: + site_post(url_post, heads_site, post_data) + + if not list_new_ver: + logging.warning(f"Не найдена новая версия скрипта") + sys.exit() + else: + logging.warning("Нет новых версий скриптов PortProton") + async def discord_post(post_text, client): channel = client.get_channel(keys.dicord_channel) @@ -230,7 +294,7 @@ async def get_discord_messages(client, channel_id): messages = [] async for message in channel.history(limit=999999): messages.append(message.content) - pattern = re.compile(r'-----------------------------\n(.*?)\n-----------------------------', re.DOTALL) + pattern = re.compile(r'----------------------------------------------------------\n### (.*?)\t\n', re.DOTALL) for message in messages: matches = pattern.findall(message) if matches: @@ -239,39 +303,6 @@ async def get_discord_messages(client, channel_id): return messages -def check_version(last_changelog, resp_changelog): - list_titles_and_ids = news() - pattern = re.compile(r'Обновление скриптов (\d+)') - - def extract_number(title): - match = pattern.search(title) - if match: - return int(match.group(1)) - return None - - numbers = [extract_number(title) for _, title in list_titles_and_ids if extract_number(title) is not None] - last_topics_script = max(numbers) - logging.info(f"Последняя новость на сайте о версии: {last_topics_script}") - - #last_topics_script = 2297 - if last_topics_script < last_changelog: - list_new_ver = [] - - for script_ver in range(last_topics_script + 1, last_changelog + 1): - list_new_ver.append(script_ver) - logging.info(f"Найдена новая версия скрипта {script_ver}") - changelog_text, post_data, params = script_content(script_ver, resp_changelog) - if post_data: - site_post(url_post, heads_site, post_data) - # vk_post(url_vk_api, params=params) - - if not list_new_ver: - logging.warning(f"Не найдена новая версия скрипта") - sys.exit() - else: - logging.warning("Нет новых версий скриптов PortProton") - - def check_discord_public(): intents = discord.Intents.default() intents.messages = True @@ -303,18 +334,81 @@ def check_discord_public(): return for topic_id, topic_title in reversed(list_for_public): - text_data, links = news_content(topic_id) + text_data = news_content(topic_id) if text_data: - content = f"-----------------------------\n{topic_title}\n-----------------------------\n" + text_data + "\n" + "----------------------------------------------------------" - for link in links: - if link not in content: - content += f"\n{link}" - # Разбиваем содержимое на части по 4000 символов - for i in range(0, len(content), 4000): - await channel.send(content[i:i+4000]) + content = f"----------------------------------------------------------\n### {topic_title}\t\n" + text_data + "\n" + # Разбиваем содержимое на части по 4000 символов + for i in range(0, len(content), 2000): + await channel.send(content[i:i+2000]) await client.close() client.run(keys.discord_token) +def vk_post(url, post_text): + params_post = { + 'access_token': keys.api_key_vk, + 'v': '5.199', # Версия API VK + 'owner_id': keys.own_id, + 'message': f'{post_text}', + 'attachments': "photo-99238527_457244491" + # Дополнительные параметры можно добавить здесь + } + try: + # Отправляем POST-запрос к VK API + resp_post = requests.post(url=url, params=params_post) + + if resp_post.status_code == 200: + logging.info("Сообщение успешно опубликовано.") + logging.info(resp_post.json()) # Выводим ответ сервера в формате JSON + else: + logging.error(f"Ошибка при публикации сообщения в ВК:, {resp_post.status_code}") + + return resp_post + except requests.RequestException as err: + logging.error(f"VK post failed: {err}") + return None + + +def vk_topics(): + wall_posts = [] + + while True: + wall_data = response_get(url_vk_get, heads_site=params_get) + if not wall_data: + logging.warning(f"Постов на стене нет") + wall_posts.extend((post['text'] for post in wall_data if 'text' in post)) + + if len(wall_data) < 100: + break + + heads_site['offset'] += 100 + + return wall_posts + + +def check_vk_posts(): + vk_posts = vk_topics() + if not vk_posts: + logging.warning(f"Постов на стене нет") + else: + logging.debug(f"Посты из VK {vk_posts}") + + list_titles_and_ids = news() + if list_titles_and_ids: + list_for_public = [] + for topic_id, topic_title in list_titles_and_ids: + if topic_title not in vk_posts(): + list_for_public.append((topic_id, topic_title)) + if not list_for_public: + logging.info(f"Новостей для публикации в ВК нет") + else: + logging.info(f"Новости для публикации в ВК: {list_for_public}") + for topic_id, topic_title in reversed(list_for_public): + text_data = news_content(topic_id) + if text_data: + content = f"----------------------------------------------------------\n### {topic_title}\t\n" + text_data + "\n" + vk_post(url_vk_post, content) + + if __name__ == '__main__': main()