translator.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. #!/usr/bin/python3
  2. # coding: utf-8
  3. from bs4 import BeautifulSoup
  4. from bs4.element import Tag
  5. import MySQLdb
  6. import re
  7. import sys
  8. import time
  9. import transliterate
  10. import openai
  11. blocks = ["p", "h1", "h2", "h3", "h4", "h5", "img", "li"]
  12. def generate_response(prompt):
  13. openai.api_key = "OPENAI_API_KEY"
  14. prompt = "Интересует только перевод с русского языка на болгарский язык. Если в оригинале встречается html разметка, то вставлять ее в соответствующих местах перевода. Если перевод совпадает с оригиналом, то вывести только перевод, пояснения о совпадении оригинала и перевода не требуются. Переведи: \"" + prompt + "\""
  15. response = openai.ChatCompletion.create(
  16. model="gpt-3.5-turbo",
  17. messages=[{"role": "system", "content": prompt}],
  18. temperature=0.3,
  19. n=1,
  20. stop=None,
  21. )
  22. if 'choices' in response and len(response.choices) > 0:
  23. return response.choices[0].message.content
  24. else:
  25. return None
  26. def remove_tag(string):
  27. pattern = r'^<[^>]+>|<[^>]+>$'
  28. result = re.sub(pattern, '', string)
  29. return result
  30. def insert_into_table_postmeta(post_id, meta_key, meta_value):
  31. db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET')
  32. cur = db.cursor()
  33. sql = """
  34. INSERT INTO wp_postmeta(meta_id, post_id, meta_key, meta_value)
  35. VALUES (NULL,'%i','%s','%s')
  36. """ % (post_id, meta_key, meta_value)
  37. cur.execute(sql)
  38. db.commit()
  39. db.close()
  40. def insert_into_table_postsTranslated(ID, post_modified):
  41. db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET')
  42. cur = db.cursor()
  43. sql = """
  44. INSERT INTO wp_posts_translated(ID, post_modified)
  45. VALUES ('%i','%s')
  46. """ % (ID, post_modified)
  47. cur.execute(sql)
  48. db.commit()
  49. db.close()
  50. def _extract_blocks(parent_tag) -> list:
  51. extracted_blocks = []
  52. for tag in parent_tag:
  53. if tag.name in blocks:
  54. if (tag.name == "img") and ("alt" in tag.attrs):
  55. if tag["alt"]:
  56. response = generate_response(tag["alt"])
  57. if response is not None:
  58. tag["alt"] = response
  59. time.sleep(25)
  60. else:
  61. sys.exit()
  62. else:
  63. response = generate_response(remove_tag(str(tag)))
  64. if response is not None:
  65. s1 = BeautifulSoup(response, 'html.parser')
  66. tag.clear()
  67. tag.append(s1)
  68. time.sleep(25)
  69. else:
  70. sys.exit()
  71. extracted_blocks.append(tag)
  72. continue
  73. if isinstance(tag, Tag):
  74. if len(tag.contents) > 0:
  75. inner_blocks = _extract_blocks(tag)
  76. if len(inner_blocks) > 0:
  77. extracted_blocks.extend(inner_blocks)
  78. return extracted_blocks
  79. if __name__ == "__main__":
  80. pattern_bg = r's:2:"bg";i:(\d+);'
  81. db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET')
  82. cur = db.cursor()
  83. sql = """
  84. SELECT * FROM wp_posts WHERE (post_status='publish') AND (post_type='post') AND (ID NOT IN (SELECT ID FROM wp_posts_translated)) ORDER BY ID ASC LIMIT 1;
  85. """
  86. cur.execute(sql)
  87. row = cur.fetchone()
  88. if row is not None:
  89. soup = BeautifulSoup(row[4], features="lxml")
  90. extracted_blocks = _extract_blocks(soup.body)
  91. post_content = re.sub(r'(<html><body>|</body></html>)', '', str(soup))
  92. post_title = ""
  93. response = generate_response(row[5])
  94. if response is not None:
  95. post_title = response
  96. time.sleep(25)
  97. else:
  98. sys.exit()
  99. post_name = transliterate.translit(post_title, 'ru', reversed=True)
  100. post_name = post_name.lower()
  101. post_name = re.sub(r'[^a-z,0-9,-, ]', '', post_name)
  102. post_name = re.sub(r' ', '-', post_name)
  103. guid = ""
  104. #####################
  105. # TABLE posts
  106. #####################
  107. db = MySQLdb.connect(host="DB_SERVER", user="DB_USER", passwd="DB_PASSWORD", db="DB_NAME", charset = 'DB_CHARSET')
  108. cur = db.cursor()
  109. sql = """
  110. INSERT INTO wp_posts(ID, post_author, post_date, post_date_gmt, post_content, post_title, post_excerpt, post_status, comment_status, ping_status, post_password, post_name, to_ping, pinged, post_modified, post_modified_gmt, post_content_filtered, post_parent, guid, menu_order, post_type, post_mime_type, comment_count)
  111. VALUES (NULL,'%s',NOW(),UTC_TIMESTAMP(),'%s','%s','%s','%s','%s','%s','%s','%s','%s','%s',NOW(),UTC_TIMESTAMP(),'%s','%s','%s','%s','%s','%s',0)
  112. """ % (row[1], MySQLdb.escape_string(post_content).decode('utf-8'), MySQLdb.escape_string(post_title).decode('utf-8'), row[6], row[7], row[8], row[9], row[10], post_name, row[12], row[13], row[16], row[17], guid, row[19], row[20], row[21])
  113. cur.execute(sql)
  114. db.commit()
  115. sql = "SELECT LAST_INSERT_ID();"
  116. cur.execute(sql)
  117. tb_posts_id = cur.fetchone()
  118. if tb_posts_id is not None:
  119. guid = "https://adminkin.com/?p=%i" % tb_posts_id[0]
  120. sql = """
  121. UPDATE wp_posts SET guid='%s' WHERE ID='%i'
  122. """ % (guid, tb_posts_id[0])
  123. cur.execute(sql)
  124. db.commit()
  125. ###############################
  126. # TABLE term_taxonomy
  127. ###############################
  128. sql = """
  129. INSERT INTO wp_term_taxonomy(term_taxonomy_id, term_id, taxonomy, description, parent, count)
  130. VALUES (NULL,0,'post_translations','a:2:{s:2:"bg";i:%i;s:2:"ru";i:%i;}',0,2)
  131. """ % (tb_posts_id[0], row[0])
  132. cur.execute(sql)
  133. db.commit()
  134. sql = "SELECT LAST_INSERT_ID();"
  135. cur.execute(sql)
  136. tb_term_taxonomy_id = cur.fetchone()
  137. if tb_term_taxonomy_id is not None:
  138. sql = """
  139. UPDATE wp_term_taxonomy SET term_id='%i' WHERE term_taxonomy_id='%i'
  140. """ % (tb_term_taxonomy_id[0], tb_term_taxonomy_id[0])
  141. cur.execute(sql)
  142. db.commit()
  143. sql = """
  144. INSERT INTO wp_terms(term_id, name, slug, term_group)
  145. VALUES ('%i','pll_6111111111111','pll_6111111111111',0)
  146. """ % (tb_term_taxonomy_id[0])
  147. cur.execute(sql)
  148. db.commit()
  149. ################################
  150. # TABLE term_relationships
  151. ################################
  152. category_id_bg = 107
  153. sql = """
  154. SELECT wp_term_relationships.*, wp_term_taxonomy.taxonomy FROM wp_term_relationships LEFT JOIN wp_term_taxonomy
  155. ON wp_term_relationships.term_taxonomy_id=wp_term_taxonomy.term_taxonomy_id WHERE wp_term_relationships.object_id='%i'
  156. """ % row[0]
  157. cur.execute(sql)
  158. for tb_term_relationships in cur.fetchall():
  159. if (tb_term_relationships[3] == 'category') or (tb_term_relationships[3] == 'post_tag'):
  160. pattern_ru = r's:2:"ru";i:%i;' % tb_term_relationships[1]
  161. sql = """
  162. SELECT * FROM wp_term_taxonomy WHERE taxonomy LIKE 'term_translations'
  163. """
  164. cur.execute(sql)
  165. for res in cur.fetchall():
  166. search_result = re.search(pattern_ru, res[3])
  167. if search_result:
  168. search_result = re.search(pattern_bg, res[3])
  169. if search_result:
  170. id_bg = int(search_result.group(1))
  171. sql = """
  172. INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order)
  173. VALUES ('%i','%s','%s')
  174. """ % (tb_posts_id[0], id_bg, tb_term_relationships[2])
  175. cur.execute(sql)
  176. db.commit()
  177. sql = """
  178. UPDATE wp_term_taxonomy SET count=count+1 WHERE term_taxonomy_id='%s'
  179. """ % (id_bg)
  180. cur.execute(sql)
  181. db.commit()
  182. if (tb_term_relationships[3] == 'category'):
  183. category_id_bg = id_bg
  184. break
  185. elif (tb_term_relationships[3] == 'language'):
  186. language_id_bg = 105
  187. sql = """
  188. INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order)
  189. VALUES ('%i','%i','%s')
  190. """ % (tb_posts_id[0], language_id_bg, tb_term_relationships[2])
  191. cur.execute(sql)
  192. db.commit()
  193. sql = """
  194. UPDATE wp_term_taxonomy SET count=count+1 WHERE term_taxonomy_id='%i'
  195. """ % (language_id_bg)
  196. cur.execute(sql)
  197. db.commit()
  198. sql = """
  199. INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order)
  200. VALUES ('%i','%i',0)
  201. """ % (tb_posts_id[0], tb_term_taxonomy_id[0])
  202. cur.execute(sql)
  203. db.commit()
  204. sql = """
  205. INSERT INTO wp_term_relationships(object_id, term_taxonomy_id, term_order)
  206. VALUES ('%i','%i',0)
  207. """ % (row[0], tb_term_taxonomy_id[0])
  208. cur.execute(sql)
  209. db.commit()
  210. ########################
  211. # TABLE postmeta
  212. ########################
  213. sql = """
  214. SELECT * FROM wp_postmeta WHERE post_id = '%i'
  215. """ % row[0]
  216. cur.execute(sql)
  217. for tb_postmeta in cur.fetchall():
  218. meta_value = ""
  219. if (tb_postmeta[2] == '_yoast_wpseo_focuskw') or (tb_postmeta[2] == '_yoast_wpseo_title'):
  220. response = generate_response(tb_postmeta[3])
  221. if response is not None:
  222. meta_value = response
  223. time.sleep(25)
  224. meta_value = MySQLdb.escape_string(meta_value).decode('utf-8')
  225. else:
  226. time.sleep(25)
  227. continue
  228. elif (tb_postmeta[2] == '_original_post'):
  229. meta_value = "https://adminkin.com/?p=%i" % tb_posts_id[0]
  230. elif (tb_postmeta[2] == '_yoast_wpseo_primary_category'):
  231. meta_value = "%i" % category_id_bg
  232. else:
  233. meta_value = MySQLdb.escape_string(tb_postmeta[3]).decode('utf-8')
  234. insert_into_table_postmeta(tb_posts_id[0], tb_postmeta[2], meta_value)
  235. ##############################
  236. # TABLE posts_translated
  237. ##############################
  238. insert_into_table_postsTranslated(row[0], row[14])
  239. db.close()