从谷歌翻译器拆分方括号中的字符串

本文关键字:字符串 方括号 拆分 谷歌 翻译器 | 更新日期: 2023-09-27 18:03:46

我正在接收来自Google语言翻译服务的数据,需要帮助拆分数据。

void Start()
{
    translateText("Hello, This is a test!", "en", "fr");
}
void translateText(string text, string fromLanguage, string toLanguage)
{
    string url = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=" + fromLanguage + "&tl=" + toLanguage + "&dt=t&q=" + Uri.EscapeUriString(text);
    StartCoroutine(startTranslator(url));
}
IEnumerator startTranslator(string url)
{
    UnityWebRequest www = UnityWebRequest.Get(url);
    yield return www.Send();
    Debug.Log("Raw string Received: " + www.downloadHandler.text);
    LanguageResult tempResult = decodeResult(www.downloadHandler.text);
    Debug.Log("Original Text: " + tempResult.originalText);
    Debug.Log("Translated Text: " + tempResult.translatedText);
    Debug.Log("LanguageIso: " + tempResult.languageIso);
    yield return null;
}
LanguageResult decodeResult(string result)
{
    char[] delims = { '[', ''"', ']', ',' };
    string[] arr = result.Split(delims, StringSplitOptions.RemoveEmptyEntries);
    LanguageResult tempLang = null;
    if (arr.Length >= 4)
    {
        tempLang = new LanguageResult();
        tempLang.translatedText = arr[0];
        tempLang.originalText = arr[1];
        tempLang.unknowValue = arr[2];
        tempLang.languageIso = arr[3];
    }
    return tempLang;
}
public class LanguageResult
{
    public string translatedText;
    public string originalText;
    public string unknowValue;
    public string languageIso;
}

然后用 Start() 函数的translateText("Hello, This is a test!", "en", "fr");调用它,该函数使用 ISO 639-1 代码将英语句子转换为法语。

接收到的数据如下所示:

[[["Bonjour, Ceci est un test!","Hello, This is a test!",,,0]],,"en"]

我想像这样拆分它:

  • Bonjour, Ceci est un test!
  • 您好,这是一个测试!
  • 0
  • zh

并按顺序将它们放入字符串数组中。

我目前使用这个:

char[] delims = { '[', ''"', ']', ',' };
        string[] arr = result.Split(delims, StringSplitOptions.RemoveEmptyEntries);

如果收到的字符串中没有逗号,则此方法有效。如果有逗号,则拆分的值会弄乱。 拆分它的最佳方法是什么?

编辑

使用Blorgbeard的解决方案,最终的工作代码如下。希望这会帮助其他人。这不应该用于商业目的,而应该用于个人或学校项目。

void Start()
{
    //translateText("Hello, This is '" / '' a test !", "en", "fr");
    //translateText("Hello, This is , '' '" a test !", "en", "fr");
    translateText("Hello, This is a test!", "en", "fr");
}
void translateText(string text, string fromLanguage, string toLanguage)
{
    string url = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=" + fromLanguage + "&tl=" + toLanguage + "&dt=t&q=" + Uri.EscapeUriString(text);
    StartCoroutine(startTranslator(url));
}
IEnumerator startTranslator(string url)
{
    UnityWebRequest www = UnityWebRequest.Get(url);
    yield return www.Send();
    Debug.Log("Raw string Received: " + www.downloadHandler.text);
    LanguageResult tempResult = decodeResult(www.downloadHandler.text);
    displayResult(tempResult);
    yield return null;
}
void displayResult(LanguageResult translationResult)
{
    Debug.Log("Original Text: " + translationResult.originalText);
    Debug.Log("Translated Text: " + translationResult.translatedText);
    Debug.Log("LanguageIso: " + translationResult.languageIso);
}
LanguageResult decodeResult(string result)
{
    string[] arr = Decode(result);
    LanguageResult tempLang = null;
    if (arr.Length >= 4)
    {
        tempLang = new LanguageResult();
        tempLang.translatedText = arr[0];
        tempLang.originalText = arr[1];
        tempLang.unknowValue = arr[2];
        tempLang.languageIso = arr[3];
    }
    return tempLang;
}
public class LanguageResult
{
    public string translatedText;
    public string originalText;
    public string unknowValue;
    public string languageIso;
}
private string[] Decode(string input)
{
    List<string> finalResult = new List<string>();
    bool inToken = false;
    bool inString = false;
    bool escaped = false;
    var seps = ",[]'"".ToArray();
    var current = "";
    foreach (var chr in input)
    {
        if (!inString && chr == '"')
        {
            current = "";
            inString = true;
            continue;
        }
        if (inString && !escaped && chr == '"')
        {
            finalResult.Add(current);
            current = "";
            inString = false;
            continue;
        }
        if (inString && !escaped && chr == '''')
        {
            escaped = true;
            continue;
        }
        if (inString && (chr != '"' || escaped))
        {
            escaped = false;
            current += chr;
            continue;
        }
        if (inToken && seps.Contains(chr))
        {
            finalResult.Add(current);
            current = "";
            inToken = false;
            continue;
        }
        if (!inString && chr == '"')
        {
            inString = true;
            current = "";
            continue;
        }
        if (!inToken && !seps.Contains(chr))
        {
            inToken = true;
            current = "";
        }
        current += chr;
    }
    return finalResult.ToArray();
}

从谷歌翻译器拆分方括号中的字符串

你可以自己编写一个简单的解析器。这是我拼凑的一个(可以使用一些清理,但演示了这个想法(:

private static IEnumerable<string> Parse(string input) {
    bool inToken = false;
    bool inString = false;
    bool escaped = false;
    var seps = ",[]'"".ToArray();
    var current = "";
    foreach (var chr in input) {
        if (!inString && chr == '"') {
            current = "";
            inString = true;
            continue;
        }
        if (inString && !escaped && chr == '"') {
            yield return current;
            current = "";
            inString = false;
            continue;
        }
        if (inString && !escaped && chr == '''') {
            escaped = true;
            continue;
        }
        if (inString && (chr != '"' || escaped)) {
            escaped = false;
            current += chr;
            continue;
        }
        if (inToken && seps.Contains(chr)) {
            yield return current;
            current = "";
            inToken = false;
            continue;
        }
        if (!inString && chr == '"') {
            inString = true;
            current = "";
            continue;
        }
        if (!inToken && !seps.Contains(chr)) {
            inToken = true;
            current = "";
        }
        current += chr;
    }
}

这是一个jsfiddle演示。

使用Regex.Split您可以执行以下操作,例如:

using System;
using System.Text.RegularExpressions;
public class Example
{
   public static void Main()
   {
        var input ="[[['"Bonjour, Ceci est un test!'",'"Hello, This is a test!'",,,0]],,'"en'"]";
        var parse = Regex.Split(input, "''[|'']|[^a-zA-Z ],|'",'"|'"|'"");
        foreach(var item in parse) {
            bool result = !String.IsNullOrEmpty(item) && (Char.IsLetter(item[0]) || Char.IsDigit(item[0]));
            if (result) {
                Console.WriteLine(item);
            }
        }
   }
}

输出:

Bonjour, Ceci est un test!
Hello, This is a test!
0
en

如果您想要拆分的所有内容,只需删除bool检查字母字符即可。

这是一个

疯狂的想法 - 按"拆分,然后按其余部分拆分(但如果"之间有"则不起作用(

var s = @"[[[""Bonjour, Ceci est un test!"",""Hello, This is a test!"",,,0]],,""en""]";
var a = s.Split('"').Select((x, i) => (i & 1) > 0 ? new[] { x } : x.Split("[],".ToArray(),  
                     StringSplitOptions.RemoveEmptyEntries)).SelectMany(x => x).ToArray();
Debug.Print(string.Join("|", a)); // "Bonjour, Ceci est un test!|Hello, This is a test!|0|en"

您可以尝试正则表达式进行拆分。我用您提供的样本进行了测试。结果是这样的。

    var str="[[['"Bonjour, Ceci est un test!'",'"Hello, This is a test!'",,,0]],,'"en'"]";
    var splitted=Regex.Split(str,@"'[|']|',");
    foreach(var split in splitted){
       Console.WriteLine(split );
    }
   "Bonjour Ceci est un test!"
   "Hello This is a test!"
    0
   "en"