From 8f79abddbf3a20da2dcd61a9aa1b3f9ea1bc4759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=95=D0=B2=D0=B3=D0=B5=D0=BD=D0=B8=D0=B9=20=D0=A5=D1=80?= =?UTF-8?q?=D0=B0=D0=BC=D0=BE=D0=B2?= Date: Sun, 26 May 2024 19:45:26 +0300 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=D0=B0=20=D0=BF=D1=83=D0=B1=D0=BB=D0=B8=D0=BA=D0=B0=D1=86?= =?UTF-8?q?=D0=B8=D1=8F=20=D0=B2=20VK?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +- news-bot.py | 110 +++++++++++++++++++++++++++++++++++----------------- 2 files changed, 76 insertions(+), 37 deletions(-) diff --git a/.gitignore b/.gitignore index b629cf6..f67d835 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .fleet __pycache__ -keys.py \ No newline at end of file +keys.py +vkdel.py \ No newline at end of file diff --git a/news-bot.py b/news-bot.py index 725fee2..b1dfc32 100755 --- a/news-bot.py +++ b/news-bot.py @@ -26,15 +26,16 @@ heads_site = { } params_get = { - 'access_token': keys.api_key_vk, - 'v': '5.199', # Версия API - 'owner_id': keys.own_id, - 'count': 100, - 'offset': 0 + 'access_token': keys.user_token_vk, + 'v': '5.236', # Версия API + 'owner_id': str(keys.own_id), + 'count': str(100), + 'offset': str(0), + 'filter': 'all' } logger = logging.getLogger() -logger.setLevel(logging.DEBUG) +logger.setLevel(logging.INFO) handler = colorlog.StreamHandler() handler.setFormatter(colorlog.ColoredFormatter( @@ -70,7 +71,7 @@ def html_to_text(html_content): h.reference_links = True # Сохранение оригинальных ссылок markdown_text = h.handle(html_content) - logging.debug(f"Markdown text для дискорд: {markdown_text}") + logging.debug(f"Markdown text: {markdown_text}") # Удаление переносов строк из-за - markdown_text = re.sub(r'-\s*\n\s*', '-', markdown_text, flags=re.DOTALL) @@ -103,7 +104,7 @@ def html_to_text(html_content): # Удаление избыточных пустых строк после удаления строк markdown_text = re.sub(r'\n\s*\n', '\n', markdown_text) - #Изменение ссылок без описания + # Изменение ссылок без описания markdown_text = re.sub(r'\[\]\((https:\/\/[^\)]+)\)', r'[content](\1)', markdown_text) markdown_text = re.sub(r'\[\s]\((https:\/\/[^\)]+)\)', r'[content](\1)', markdown_text) @@ -120,6 +121,12 @@ def remove_empty_lines(text_data): return '\n'.join(non_empty_lines) +def remove_markdown_links(markdown_text): + # Регулярное выражение для поиска Markdown-ссылок и замена их на только URL + markdown_text = re.sub(r'\[.*?\]\((https?://.*?)\)', r'\1' or r'(`https?://.*?)`\)', markdown_text) + return markdown_text + + def remove_duplicate_links(text): seen_links = set() @@ -137,6 +144,12 @@ def remove_duplicate_links(text): return text +def extract_links(text): + # Регулярное выражение для поиска ссылок + url_pattern = re.compile(r'https?://\S+') + return url_pattern.findall(text) + + def script_content(script_ver, resp_changelog): soup = make_soup(resp_changelog) page_text = str(soup) @@ -158,6 +171,8 @@ def script_content(script_ver, resp_changelog): site_text = (f"[center][img]/uploads/default/original/1X/5cfa59077a5275971401fab0114e56f3ffdd0ec4.png[/img][" f"/center]\n{post_text}") + logging.debug(f"Сообщение на сайт {site_text}") + post_data = { "title": f"Обновление скриптов {script_ver}", "raw": site_text, @@ -165,7 +180,6 @@ def script_content(script_ver, resp_changelog): "tags": ["scripts"] } - return post_text, post_data, post_text @@ -180,7 +194,6 @@ def news_content(post_id): if post.get('post_number') == 1: html_content = post.get('cooked', 'Нет содержимого') text_data = html_to_text(html_content) - logging.debug(text_data) return text_data logging.error(f"Первый пост не найден в теме с ID: {post_id}") return None @@ -191,7 +204,10 @@ def news_content(post_id): def response_get(url, heads_site): try: - return requests.get(url, headers=heads_site) + if heads_site == params_get: + return requests.get(url, params=params_get) + elif heads_site == heads_site: + return requests.get(url, headers=heads_site) except requests.RequestException as err: logging.error(f"Ошибка запроса {err}") @@ -211,12 +227,8 @@ def resp_change(): return None, None -def resp_get(url, heads_site): - return response_get(url, heads_site=heads_site) - - def news(): - resp_topics = resp_get(url_news, heads_site) + resp_topics = response_get(url_news, heads_site) if resp_topics.status_code == 200: data = resp_topics.json() @@ -344,15 +356,18 @@ def check_discord_public(): client.run(keys.discord_token) -def vk_post(url, post_text): +def vk_post(url, post_text, links=None): params_post = { 'access_token': keys.api_key_vk, - 'v': '5.199', # Версия API VK - 'owner_id': keys.own_id, - 'message': f'{post_text}', - 'attachments': "photo-99238527_457244491" + 'v': '5.236', # Версия API VK + 'owner_id': str(keys.own_id), + 'message': f'{post_text}' # Дополнительные параметры можно добавить здесь } + + if links: + params_post['attachments'] = links + try: # Отправляем POST-запрос к VK API resp_post = requests.post(url=url, params=params_post) @@ -369,35 +384,49 @@ def vk_post(url, post_text): return None -def vk_topics(): +def get_vk_topics(): wall_posts = [] - while True: - wall_data = response_get(url_vk_get, heads_site=params_get) - if not wall_data: - logging.warning(f"Постов на стене нет") - wall_posts.extend((post['text'] for post in wall_data if 'text' in post)) + wall_data = response_get(url_vk_get, params_get) + wall_data_json = wall_data.json() + if 'error' in wall_data_json: + error_code = wall_data_json['error']['error_code'] + error_msg = wall_data_json['error']['error_msg'] + logging.error(f"Ошибка {error_code}: {error_msg}") + sys.exit(f"Ошибка {error_code}: {error_msg}") - if len(wall_data) < 100: + items = wall_data_json.get('response', {}).get('items', []) + if not items: + logging.warning("Постов на стене нет") break - heads_site['offset'] += 100 + wall_posts.extend((post['text'] for post in items if 'text' in post)) + if len(items) < 100: + break + + params_get['offset'] = str(int(params_get['offset']) + 100) + + pattern = re.compile(r'----------------------------------------------------------\n### (.*?)\t\n', re.DOTALL) + for message in wall_posts: + matches = pattern.findall(message) + if matches: + wall_posts.extend(matches) + logging.debug(f"Найдены посты в ВК: {wall_posts}") return wall_posts def check_vk_posts(): - vk_posts = vk_topics() + vk_posts = get_vk_topics() if not vk_posts: - logging.warning(f"Постов на стене нет") - else: - logging.debug(f"Посты из VK {vk_posts}") + logging.warning(f"Постов на стене нет{vk_posts}") list_titles_and_ids = news() if list_titles_and_ids: list_for_public = [] for topic_id, topic_title in list_titles_and_ids: - if topic_title not in vk_posts(): + # Сравнение заголовков с текстами постов + if not any(topic_title in vk_posts for vk_posts in vk_posts): list_for_public.append((topic_id, topic_title)) if not list_for_public: logging.info(f"Новостей для публикации в ВК нет") @@ -406,8 +435,17 @@ def check_vk_posts(): for topic_id, topic_title in reversed(list_for_public): text_data = news_content(topic_id) if text_data: - content = f"----------------------------------------------------------\n### {topic_title}\t\n" + text_data + "\n" - vk_post(url_vk_post, content) + content = f"----------------------------------------------------------\n{topic_title}\t\n" + text_data + "\n" + content = remove_markdown_links(content) + links = extract_links(content) + if "Обновление скриптов" in topic_title: + # Пример добавления изображения с постом + vk_post(url_vk_post, content, "photo-99238527_457244491") + else: + if links: + vk_post(url_vk_post, content, links) + else: + vk_post(url_vk_post, content) if __name__ == '__main__':