Phrase TMS API

Analytics Aggregations (TMS)

文本由 Phrase Language AI 从英语机器翻译而得。

运行聚合查询的数据存储建立在Elasticsearch之上(当前版本1.7.x)。Elasticsearch 提供实时搜索和聚合能力,分析 API 在其上构建了一个轻量且安全增强的层。在示例中使用了Elasticsearch查询和聚合语言。

索引

不同类型的数据存储在不同的索引中。例如,工作 数据存储在 jobPart 索引中,而 costs 数据存储在 costs 索引中。所需的数据可以通过在聚合 API 端点 URL 中指定索引来获取。

父子文档

关于工作的部分信息存储在Elasticsearch数据库中的两个不同文档中。这些文档处于父子关系中。例如,关于工作项目的信息存储在父文档中,而关于工作本身的信息存储在子文档中。根据所需的信息,聚合可以在数据的父子聚合之前或之后进行,但数据聚合本身应在所有查询中存在。

索引数据模型

常见数据

分析

{
  "id": string,
  "createdBy": <User>,
  "dateCreated":日期,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "deleted": boolean,
  "innerId": number,
  "name": string,
  "tags": string[],
  "trashed": boolean,
  "trashedBy": <User>,
  "type": string
}

分配

{
  "name": string,
  "linguist": <User>,
  "vendor": <Vendor>
}

Submitter portal

{
  "id": string,
  "createdBy": <User>,
  "dateCreated":日期,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "deleted": boolean,
  "name": string,
  "tags": string[],
  "trashed": boolean,
  "urlId": string
}

买方

{
  "id": string,
  "name": string
}

客户

{
  "id": string,
  "dateCreated":日期,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "deleted": boolean,
  "externalId": string,
  "name": string,
  "tags": string[],
  "trashed": boolean,
}

成本中心

{
  "id": string,
  "dateCreated":日期,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "deleted": boolean,
  "name": string,
  "tags": string[],
  "trashed": boolean,
}

领域

{
  "id": string,
  "dateCreated":日期,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "deleted": boolean,
  "name": string,
  "tags": string[],
  "trashed": boolean
}

工作

{
  "id": ID,
  "createdBy": <User>,
  "dateCreated":日期,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "deleted": boolean,
  "extension": string,
  "fileName": string,
  "groupCount": number,
  "innerId":"1",
  "languagePair": string,
  "lastModified":日期,
  "localePair": string,
  "sourceLanguage": string,
  "sourceLocale": string,
  "tags": string[],
  "targetLanguage": string,
  "targetLocale": string",
  "taskId": string,
  "trashed": boolean,
  "trashedBy": <User>,
  "uid": string
}

JobPart

{
  "id":"19",
  "assignedTo": <Assignment>,
  "beginIndex": number,
  "buyer": <Buyer>,
  "createdBy": <User>,
  "dateCreated":日期,
  "dateDue":日期,
  "endIndex": number,
  "groupCount": number,
  "innerId": 字符串,
  "lastModified":日期,
  "level": number,
  "status": 字符串,
  "tags": [],
  "uid": 字符串,
  "workflowStep": <WorkflowStep>
}

机器翻译引擎

{
  "ID": 字符串,
  "deleted": boolean,
  "default_": boolean,
  "includeTags": boolean,
  "name": string,
  "tags": string[],
  "type": string,
}

NetRateScheme

{
  "id": string,
  "createdBy": <User>,
  "dateCreated":日期,
  "dateDeleted":日期,
  "default_": boolean,
  "deleted": boolean,
  "externalId": string,
  "name": string,
  "tags": string[],
}

PriceList

{
  "id": string,
  "createdBy": <User>,
  "currency": string,
  "dateCreated":日期,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "default_": boolean,
  "deleted": boolean,
  "name": string,
  "tags": string[],
  "trashed": boolean,
  "unit": string
}

项目

{
  "id": string,
  "buyer": <Buyer>,
  "client": <Client>,
  "costCenter": <CostCenter>,
  "createdBy": <User>,
  "dateCreated":日期,
  "dateDeleted":日期,
  "dateDue":日期,
  "dateTrashed":日期,
  "deleted": boolean,
  "domain": <Domain>,
  "innerId": number,
  "languagePairs": string[],
  "localePairs": string[],
  "mtEngine": <MtEngine>,
  "name": string,
  "note": string,
  "owner": <User>,
  "sourceLanguage": string,
  "sourceLocale": string,
  "status": string,
  "subDomain": <SubDomain>,
  "tags": string[],
  "targetLanguages": string[],
  "targetLocales": string[],
  "trashed": boolean,
  "trashedBy": <User>,
  "uid": string,
  "vendor": <Vendor>
}

报价

{
  "id": string,
  "createdBy": <User>,
  "currency": string,
  "dateCreated":日期,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "deleted": boolean,
  "innerId": string,
  "name": 字符串,
  "status": 字符串,
  "tags": 字符串[],
  "trashed": boolean,
  "trashedBy": <User>,
  "unit": 字符串
}

服务

{
  "id": 字符串,
  "dateCreated":日期,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "deleted": boolean,
  "name": 字符串,
  "publicName": 字符串,
  "tags": 字符串[],
  "trashed": boolean,
  "type": 字符串
}

子领域

{
  "id": 字符串,
  "dateCreated":日期,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "deleted": boolean,
  "name": 字符串,
  "tags": 字符串[],
  "trashed": boolean
}

用户

{
  "id": number,
  "active": boolean,
  "dateCreated":日期,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "deleted": boolean,
  "email": 字符串,
  "firstName": string,
  "fullName": string,
  "innerId": number,
  "jobTitle": string,
  "lastName": string,
  "区域": string,
  "注释": string,
  "role": string,
  "标记": string[],
  "timeZone": string,
  "trashed": boolean,
  "userName": string
}

供应商

{
  "id": string,
  "candidate": boolean,
  "dateCreated":Date,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "deleted": boolean,
  "tags": 标记[],
  "token": string,
  "trashed": boolean
}

工作流步骤

{
  "id": string,
  "abbreviation": string,
  "dateDeleted":日期,
  "dateTrashed":日期,
  "name": string,
  "订单": number,
  "标记": string[],
  "trashed": boolean
}

分析索引

数据聚合

"data": {
  "子": {
    "类型": "analysisType"
  }
}

父文档

{
  "automationWidget": <AutomationWidget>,
  "project": <Project>,
  "service": <Service>
}

子文档

{
  "analysis": <Analysis>,
  "jobPart": <JobPart>,
  "job": <Job>,
  "netRateScheme": <NetRateScheme>,

  "priority": number,
  "data": {
    "机器翻译": {
      "match0": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      },
      "match100": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      },
      "match50": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      },
      "match75": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      },
      "match85": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      },
      "match95": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      }
    },
    "重复": {
      "字符": number,
      "pages": number,
      "percent": number,
      "句段": number,
      "words": number
    },
    "翻译记忆库": {
      "match0": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      },
      "match100": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      },
      "match101": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      },
      "match50": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      },
      "match75": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      },
      "match85": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      },
      "match95": {
        "字符": number,
        "pages": number,
        "percent": number,
        "句段": number,
        "words": number
      }
    },
    "total": {
      "字符": number,
      "pages": number,
      "percent": number,
      "句段": number,
      "words": number
    }
  }
}

API索引

数据聚合

"data": {
  "子": {
    "类型": "apiType"
  }
}

父文档

{
  "user": <User>
}

子文档

{
  "request": {
    "date":Date,
    "host": string,
    "ipv4": string,
    "ipv6": string,
    "location": string,
    "method": string
  },
  "response": {
    "date":日期,
    "duration": number,
    "status": number
  },
  "api": {
    "action": string,
    "asynch": boolean,
    "类型": string,
    "uri": string,
    "ver": string
  }
}

成本指数

数据聚合

"data": {
  "子": {
    "类型": "costsType"
  }
}

父文档

{
  "automationWidget": <AutomationWidget>,
  "project": <Project>,
  "service": <Service>
}

子文档

{
  "analysis": <Analysis>,
  "job": <Job>,
  "jobPart": <JobPart>,
  "netRateScheme": <NetRateScheme>,
  "priceList": <PriceList>,
  "quote": <Quote>,
  "工作流步骤": <WorkflowStep>,
  
  "priority": number,
  "data": {
    "机器翻译": {
      "match0": number,
      "match100": number,
      "match50": number,
      "match75": number,
      "match85": number,
      "match95": number
    },
    "重复": number,
    "翻译记忆库": {
      "match0": number,
      "match100": number,
      "match101": number,
      "match50": number,
      "match75": number,
      "match85": number,
      "match95": number
    },
    "total": number
  }
}

工作部分索引

数据聚合

"data": {
  "子": {
    "类型": "jobPartType"
  }
}

父文档

{
  "automationWidget": <AutomationWidget>,
  "project": <Project>,
  "service": <Service>
}

子文档

{
  "job": <Job>,
  "jobPart": <JobPart>

  "data": {
    "counts": {
      "chars": {
      "total": number,
      "confirmed": number,
      "notConfirmed": number,
      "已锁定": number,
      "notLocked": number,
      "已确认且已锁定": number,
      "未确认且已锁定": number,
      "完成": number,
      "未完成": number
    },
    "groups": {
      "total": number
    },
    "segments": {
      "total": number,
      "已确认": number,
      "notConfirmed": number,
      "已锁定": number,
      "notLocked": number,
      "已确认且已锁定": number,
      "未确认且已锁定": number,
      "完成": number,
      "未完成": number,
      "机器翻译": {
        "postEdited": number,
        "relevant": number,
        "notRelevant": number
      },
      "QA": {
        "checked": number,
        "notChecked": number
      }
    },
    "words": {
      "total": number,
      "confirmed": number,
      "notConfirmed": number,
      "locked": number,
      "notLocked": number,
      "已确认且已锁定": number,
      "未确认且已锁定": number,
      "完成": number,
      "未完成": number
    },
    "QA": {
      "warnings": number,
      "ignoredWarnings": number,
      "notIgnoredWarnings": number
    }
  }
}

示例

  • HTTP 请求在 Postman 中运行.

  • 通过项目经理或管理员个人资料登录,并使用登录 API 获取登录令牌。

  • api/v3/analytics/jobPart?token=&lt;your_login_token&gt; 发送 POST 请求。

工作总数

查询

{
  "aggregations": {
    "data": {
      "子": {
        "类型": "jobPartType"
      }
    }
  }
}

响应

{
  "hits": {
    "total":359
  },
  "aggregations": {
    "data": {
      "doc_count":14417
    }
  }
}

aggregations 字段下,data 聚合响应了 14,417 个匹配文档,显示在 doc_count 字段中。

总源词数量

查询

{
  "aggregations": {
    "data": {
      "children": {
        "type": "jobPartType"
      },
      "aggs": {
        "wordCount": {
          "sum": {
            "field": "data.volume.words"
          }
        }
      }
    }
  }
}

响应

{
  "hits": {
    "total":359
  },
  "aggregations": {
    "data": {
      "doc_count":14417,
      "wordCount": {
        "value":6893067
      }
    }
  }
}

按译文语言划分的原文词总数

查询

{
  "aggregations": {
    "data": {
      "children": {
        "type": "jobPartType"
      },
      "aggs": {
        "byTargetLanguage": {
          "terms": {
            "field": "job.targetLanguage",
            "size":3
          },
          "aggs": {
            "wordCount": {
              "sum": {
                "field": "data.volume.words"
              }
            }
          }
        }
      }
    }
  }
}

Response

{
  "hits": {
    "total":359
  },
  "aggregations": {
    "data": {
      "byTargetLanguage": {
        "doc_count_error_upper_bound":0,
        "sum_other_doc_count":10071,
        "buckets": [
          {
            "doc_count":1835,
            "wordCount": {
              "value":702721
            },
            "key": "cs"
          },
          {
            "doc_count":1491,
            "wordCount": {
              "value":2602529
            },
            "key": "de"
          },
          {
            "doc_count":1020,
            "wordCount": {
              "value":92676
            },
            "key": "fi"
          }
        ]
      },
      "doc_count":14417
    }
  }
}

buckets 实体在响应中显示。当分析模块被要求按某个类别拆分数据时,结果会以一个桶列表的形式展示。每个桶包含一个键,用于定义该桶所代表的数据(在本例中为译文语言),以及一个只针对数据集该部分的值(其他聚合)。

按项目状态拆分的工作总数

查询

{
  "aggregations": {
    "projectStatus": {
      "terms": {
        "field": "project.status"
      },
      "aggs": {
        "data": {
          "children": {
            "type": "jobPartType"
          }
        }
      }
    }
  }
}

响应

{
  "hits": {
    "total":359
  },
  "aggregations": {
    "projectStatus": {
      "doc_count_error_upper_bound":0,
      "sum_other_doc_count":0,
      "buckets": [
        {
          "doc_count":326,
          "data": {
            "doc_count":14318
          },
          "key":"NEW"
        },
        {
          "doc_count":31,
          "data": {
            "doc_count":89
          },
          "key":"完成"
        },
        {
          "doc_count":1,
          "data": {
            "doc_count":4
          },
          "key":"分配"
        },
        {
          "doc_count":1,
          "data": {
            "doc_count":6
          },
          "key":"供应商已拒绝"
        }
      ]
    }
  }
}

分析模块已被告知在数据聚合之前按项目状态拆分数据。

来自 NEW 项目的工作总数

查询

{
  "filter": {
    "term": {
      "project.status":"NEW"
    }
  },
  "aggregations": {
    "data": {
      "children": {
        "type": "jobPartType"
      }
    }
  }
}

响应

{
  "hits": {
    "total":326
  },
  "aggregations": {
    "data": {
      "doc_count":14318
    }
  }
}

筛选字段用于查询。这在后续聚合之前缩小了数据范围。

这篇文章有帮助吗?

Sorry about that! In what way was it not helpful?

The article didn’t address my problem.
I couldn’t understand the article.
The feature doesn’t do what I need.
Other reason.

Note that feedback is provided anonymously so we aren't able to reply to questions.
If you'd like to ask a question, submit a request to our Support team.
Thank you for your feedback.