Explorar el Código

1) Added transformation of the href attribute of the <a> tag. The link to an article from adminkin.com in Russian is replaced with a link to the corresponding article from adminkin.com in Bulgarian. 2) Fixed an error in generating the URL for the BG translation of the article. 3) Sometimes, OpenAI adds double quotes at the beginning and end of the response. Removal of these unnecessary double quotes.

gogsadmin hace 1 año
padre
commit
cfb6d553f1
Se han modificado 1 ficheros con 33 adiciones y 3 borrados
  1. 33 3
      translator.py

+ 33 - 3
translator.py

@@ -12,6 +12,30 @@ import openai
 
 blocks = ["p", "h1", "h2", "h3", "h4", "h5", "img", "li"]
 
+def href_rus_to_bg(db, cur, pattern_bg, text):
+    pattern_url = r'https://adminkin.com/([^/]+)'
+    for rus_postname in re.findall(pattern_url, text):
+        sql = """
+        SELECT wp_term_taxonomy.taxonomy, wp_term_taxonomy.description FROM wp_posts 
+        LEFT JOIN wp_term_relationships ON wp_posts.ID=wp_term_relationships.object_id 
+        LEFT JOIN wp_term_taxonomy ON wp_term_relationships.term_taxonomy_id=wp_term_taxonomy.term_taxonomy_id 
+        WHERE wp_posts.post_name LIKE '%s';
+        """ % rus_postname
+        cur.execute(sql)
+        for row in cur.fetchall():
+            if (row[0] == 'post_translations'):
+                search_result = re.search(pattern_bg, row[1])
+                if search_result:
+                    bg_postid = int(search_result.group(1))
+                    sql = "SELECT post_name FROM wp_posts WHERE ID = '%i'" % bg_postid
+                    cur.execute(sql)
+                    row2 = cur.fetchone()
+                    if row2 is not None:
+                        bg_postname = "bg/%s" % row2[0]
+                        text = re.sub(rus_postname, bg_postname, text)
+                        break
+    return text
+
 def generate_response(prompt):
     openai.api_key = "OPENAI_API_KEY"
     prompt = "Требуется только перевод с русского языка на болгарский язык. Если в оригинале есть английские слова, то они должны присутствовать в переводе. Если в оригинале встречается html разметка, то вставлять её в соответствующих местах перевода. Никакие пояснения к переводу не требуются. Переведи: \"" + prompt + "\""
@@ -25,7 +49,12 @@ def generate_response(prompt):
     )
 
     if 'choices' in response and len(response.choices) > 0:
-        return response.choices[0].message.content
+        text = response.choices[0].message.content
+        if text.startswith('"'):
+            text = text[1:]
+        if text.endswith('"'):
+            text = text[:-1]
+        return text
     else:
         return None
 
@@ -98,7 +127,8 @@ if __name__ == "__main__":
     cur.execute(sql)
     row = cur.fetchone()
     if row is not None:
-        soup = BeautifulSoup(row[4], features="lxml")
+        text = href_rus_to_bg(db, cur, pattern_bg, row[4])
+        soup = BeautifulSoup(text, features="lxml")
         extracted_blocks = _extract_blocks(soup.body)
         post_content = re.sub(r'(<html><body>|</body></html>)', '', str(soup))
 
@@ -112,7 +142,7 @@ if __name__ == "__main__":
 
         post_name = transliterate.translit(post_title, 'ru', reversed=True)
         post_name = post_name.lower()
-        post_name = re.sub(r'[^a-z,0-9,-, ]', '', post_name)
+        post_name = re.sub(r'[^a-z0-9\- ]', '', post_name)
         post_name = re.sub(r' ', '-', post_name)
 
         guid = ""