|
@@ -0,0 +1,261 @@
|
|
|
+#!/usr/bin/python3
|
|
|
+# coding: utf-8
|
|
|
+
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+from bs4.element import Tag
|
|
|
+import MySQLdb
|
|
|
+import re
|
|
|
+import sys
|
|
|
+import time
|
|
|
+import transliterate
|
|
|
+import openai
|
|
|
+
|
|
|
+blocks = ["p", "h1", "h2", "h3", "h4", "h5", "img", "li"]
|
|
|
+
|
|
|
+def generate_response(prompt):
|
|
|
+ openai.api_key = "OPENAI_API_KEY"
|
|
|
+ prompt = "Интересует только перевод с русского языка на болгарский язык. Если в оригинале встречается html разметка, то вставлять ее в соответствующих местах перевода. Если перевод совпадает с оригиналом, то вывести только перевод, пояснения о совпадении оригинала и перевода не требуются. Переведи: \"" + prompt + "\""
|
|
|
+
|
|
|
+ response = openai.ChatCompletion.create(
|
|
|
+ model="gpt-3.5-turbo",
|
|
|
+ messages=[{"role": "system", "content": prompt}],
|
|
|
+ temperature=0.3,
|
|
|
+ n=1,
|
|
|
+ stop=None,
|
|
|
+ )
|
|
|
+
|
|
|
+ if 'choices' in response and len(response.choices) > 0:
|
|
|
+ return response.choices[0].message.content
|
|
|
+ else:
|
|
|
+ return None
|
|
|
+
|
|
|
+def remove_tag(string):
|
|
|
+ pattern = r'^<[^>]+>|<[^>]+>$'
|
|
|
+ result = re.sub(pattern, '', string)
|
|
|
+ return result
|
|
|
+
|
|
|
+def insert_into_table_postmeta(post_id, meta_key, meta_value):
|
|
|
+ db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET')
|
|
|
+ cur = db.cursor()
|
|
|
+ sql = """
|
|
|
+ INSERT INTO wp_postmeta(meta_id, post_id, meta_key, meta_value)
|
|
|
+ VALUES (NULL,'%i','%s','%s')
|
|
|
+ """ % (post_id, meta_key, meta_value)
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+ db.close()
|
|
|
+
|
|
|
+def insert_into_table_postsTranslated(ID, post_modified):
|
|
|
+ db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET')
|
|
|
+ cur = db.cursor()
|
|
|
+ sql = """
|
|
|
+ INSERT INTO wp_posts_translated(ID, post_modified)
|
|
|
+ VALUES ('%i','%s')
|
|
|
+ """ % (ID, post_modified)
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+ db.close()
|
|
|
+
|
|
|
+def _extract_blocks(parent_tag) -> list:
|
|
|
+ extracted_blocks = []
|
|
|
+ for tag in parent_tag:
|
|
|
+ if tag.name in blocks:
|
|
|
+ if (tag.name == "img") and ("alt" in tag.attrs):
|
|
|
+ if tag["alt"]:
|
|
|
+ response = generate_response(tag["alt"])
|
|
|
+ if response is not None:
|
|
|
+ tag["alt"] = response
|
|
|
+ time.sleep(25)
|
|
|
+ else:
|
|
|
+ sys.exit()
|
|
|
+ else:
|
|
|
+ response = generate_response(remove_tag(str(tag)))
|
|
|
+ if response is not None:
|
|
|
+ s1 = BeautifulSoup(response, 'html.parser')
|
|
|
+ tag.clear()
|
|
|
+ tag.append(s1)
|
|
|
+ time.sleep(25)
|
|
|
+ else:
|
|
|
+ sys.exit()
|
|
|
+ extracted_blocks.append(tag)
|
|
|
+ continue
|
|
|
+ if isinstance(tag, Tag):
|
|
|
+ if len(tag.contents) > 0:
|
|
|
+ inner_blocks = _extract_blocks(tag)
|
|
|
+ if len(inner_blocks) > 0:
|
|
|
+ extracted_blocks.extend(inner_blocks)
|
|
|
+ return extracted_blocks
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ pattern_bg = r's:2:"bg";i:(\d+);'
|
|
|
+
|
|
|
+ db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET')
|
|
|
+ cur = db.cursor()
|
|
|
+
|
|
|
+ sql = """
|
|
|
+ SELECT * FROM wp_posts WHERE (post_status='publish') AND (post_type='post') AND (ID NOT IN (SELECT ID FROM wp_posts_translated)) ORDER BY ID ASC LIMIT 1;
|
|
|
+ """
|
|
|
+ cur.execute(sql)
|
|
|
+ row = cur.fetchone()
|
|
|
+ if row is not None:
|
|
|
+ soup = BeautifulSoup(row[4], features="lxml")
|
|
|
+ extracted_blocks = _extract_blocks(soup.body)
|
|
|
+ post_content = re.sub(r'(<html><body>|</body></html>)', '', str(soup))
|
|
|
+
|
|
|
+ post_title = ""
|
|
|
+ response = generate_response(row[5])
|
|
|
+ if response is not None:
|
|
|
+ post_title = response
|
|
|
+ time.sleep(25)
|
|
|
+ else:
|
|
|
+ sys.exit()
|
|
|
+
|
|
|
+ post_name = transliterate.translit(post_title, 'ru', reversed=True)
|
|
|
+ post_name = post_name.lower()
|
|
|
+ post_name = re.sub(r'[^a-z,0-9,-, ]', '', post_name)
|
|
|
+ post_name = re.sub(r' ', '-', post_name)
|
|
|
+
|
|
|
+ guid = ""
|
|
|
+
|
|
|
+ #####################
|
|
|
+ # TABLE posts
|
|
|
+ #####################
|
|
|
+ db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET')
|
|
|
+ cur = db.cursor()
|
|
|
+ sql = """
|
|
|
+ INSERT INTO wp_posts(ID, post_author, post_date, post_date_gmt, post_content, post_title, post_excerpt, post_status, comment_status, ping_status, post_password, post_name, to_ping, pinged, post_modified, post_modified_gmt, post_content_filtered, post_parent, guid, menu_order, post_type, post_mime_type, comment_count)
|
|
|
+ VALUES (NULL,'%s',NOW(),UTC_TIMESTAMP(),'%s','%s','%s','%s','%s','%s','%s','%s','%s','%s',NOW(),UTC_TIMESTAMP(),'%s','%s','%s','%s','%s','%s',0)
|
|
|
+ """ % (row[1], MySQLdb.escape_string(post_content).decode('utf-8'), MySQLdb.escape_string(post_title).decode('utf-8'), row[6], row[7], row[8], row[9], row[10], post_name, row[12], row[13], row[16], row[17], guid, row[19], row[20], row[21])
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+ sql = "SELECT LAST_INSERT_ID();"
|
|
|
+ cur.execute(sql)
|
|
|
+ tb_posts_id = cur.fetchone()
|
|
|
+ if tb_posts_id is not None:
|
|
|
+ guid = "https://adminkin.com/?p=%i" % tb_posts_id[0]
|
|
|
+ sql = """
|
|
|
+ UPDATE wp_posts SET guid='%s' WHERE ID='%i'
|
|
|
+ """ % (guid, tb_posts_id[0])
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+
|
|
|
+ ###############################
|
|
|
+ # TABLE term_taxonomy
|
|
|
+ ###############################
|
|
|
+ sql = """
|
|
|
+ INSERT INTO wp_term_taxonomy(term_taxonomy_id, term_id, taxonomy, description, parent, count)
|
|
|
+ VALUES (NULL,0,'post_translations','a:2:{s:2:"bg";i:%i;s:2:"ru";i:%i;}',0,2)
|
|
|
+ """ % (tb_posts_id[0], row[0])
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+ sql = "SELECT LAST_INSERT_ID();"
|
|
|
+ cur.execute(sql)
|
|
|
+ tb_term_taxonomy_id = cur.fetchone()
|
|
|
+ if tb_term_taxonomy_id is not None:
|
|
|
+ sql = """
|
|
|
+ UPDATE wp_term_taxonomy SET term_id='%i' WHERE term_taxonomy_id='%i'
|
|
|
+ """ % (tb_term_taxonomy_id[0], tb_term_taxonomy_id[0])
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+ sql = """
|
|
|
+ INSERT INTO wp_terms(term_id, name, slug, term_group)
|
|
|
+ VALUES ('%i','pll_6111111111111','pll_6111111111111',0)
|
|
|
+ """ % (tb_term_taxonomy_id[0])
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+ ################################
|
|
|
+ # TABLE term_relationships
|
|
|
+ ################################
|
|
|
+ category_id_bg = 107
|
|
|
+ sql = """
|
|
|
+ SELECT wp_term_relationships.*, wp_term_taxonomy.taxonomy FROM wp_term_relationships LEFT JOIN wp_term_taxonomy
|
|
|
+ ON wp_term_relationships.term_taxonomy_id=wp_term_taxonomy.term_taxonomy_id WHERE wp_term_relationships.object_id='%i'
|
|
|
+ """ % row[0]
|
|
|
+ cur.execute(sql)
|
|
|
+ for tb_term_relationships in cur.fetchall():
|
|
|
+ if (tb_term_relationships[3] == 'category') or (tb_term_relationships[3] == 'post_tag'):
|
|
|
+ pattern_ru = r's:2:"ru";i:%i;' % tb_term_relationships[1]
|
|
|
+ sql = """
|
|
|
+ SELECT * FROM wp_term_taxonomy WHERE taxonomy LIKE 'term_translations'
|
|
|
+ """
|
|
|
+ cur.execute(sql)
|
|
|
+ for res in cur.fetchall():
|
|
|
+ search_result = re.search(pattern_ru, res[3])
|
|
|
+ if search_result:
|
|
|
+ search_result = re.search(pattern_bg, res[3])
|
|
|
+ if search_result:
|
|
|
+ id_bg = int(search_result.group(1))
|
|
|
+ sql = """
|
|
|
+ INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order)
|
|
|
+ VALUES ('%i','%s','%s')
|
|
|
+ """ % (tb_posts_id[0], id_bg, tb_term_relationships[2])
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+
|
|
|
+ sql = """
|
|
|
+ UPDATE wp_term_taxonomy SET count=count+1 WHERE term_taxonomy_id='%s'
|
|
|
+ """ % (id_bg)
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+
|
|
|
+ if (tb_term_relationships[3] == 'category'):
|
|
|
+ category_id_bg = id_bg
|
|
|
+ break
|
|
|
+ elif (tb_term_relationships[3] == 'language'):
|
|
|
+ language_id_bg = 105
|
|
|
+ sql = """
|
|
|
+ INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order)
|
|
|
+ VALUES ('%i','%i','%s')
|
|
|
+ """ % (tb_posts_id[0], language_id_bg, tb_term_relationships[2])
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+
|
|
|
+ sql = """
|
|
|
+ UPDATE wp_term_taxonomy SET count=count+1 WHERE term_taxonomy_id='%i'
|
|
|
+ """ % (language_id_bg)
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+ sql = """
|
|
|
+ INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order)
|
|
|
+ VALUES ('%i','%i',0)
|
|
|
+ """ % (tb_posts_id[0], tb_term_taxonomy_id[0])
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+ sql = """
|
|
|
+ INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order)
|
|
|
+ VALUES ('%i','%i',0)
|
|
|
+ """ % (row[0], tb_term_taxonomy_id[0])
|
|
|
+ cur.execute(sql)
|
|
|
+ db.commit()
|
|
|
+
|
|
|
+ ########################
|
|
|
+ # TABLE postmeta
|
|
|
+ ########################
|
|
|
+ sql = """
|
|
|
+ SELECT * FROM wp_postmeta WHERE post_id = '%i'
|
|
|
+ """ % row[0]
|
|
|
+ cur.execute(sql)
|
|
|
+ for tb_postmeta in cur.fetchall():
|
|
|
+ meta_value = ""
|
|
|
+ if (tb_postmeta[2] == '_yoast_wpseo_focuskw') or (tb_postmeta[2] == '_yoast_wpseo_title'):
|
|
|
+ response = generate_response(tb_postmeta[3])
|
|
|
+ if response is not None:
|
|
|
+ meta_value = response
|
|
|
+ time.sleep(25)
|
|
|
+ meta_value = MySQLdb.escape_string(meta_value).decode('utf-8')
|
|
|
+ else:
|
|
|
+ time.sleep(25)
|
|
|
+ continue
|
|
|
+ elif (tb_postmeta[2] == '_original_post'):
|
|
|
+ meta_value = "https://adminkin.com/?p=%i" % tb_posts_id[0]
|
|
|
+ elif (tb_postmeta[2] == '_yoast_wpseo_primary_category'):
|
|
|
+ meta_value = "%i" % category_id_bg
|
|
|
+ else:
|
|
|
+ meta_value = MySQLdb.escape_string(tb_postmeta[3]).decode('utf-8')
|
|
|
+
|
|
|
+ insert_into_table_postmeta(tb_posts_id[0], tb_postmeta[2], meta_value)
|
|
|
+ ##############################
|
|
|
+ # TABLE posts_translated
|
|
|
+ ##############################
|
|
|
+ insert_into_table_postsTranslated(row[0], row[14])
|
|
|
+ db.close()
|