运行聚合查询的数据存储建立在Elasticsearch之上(当前版本1.7.x)。Elasticsearch 提供实时搜索和聚合能力,分析 API 在其上构建了一个轻量且安全增强的层。在示例中使用了Elasticsearch查询和聚合语言。
-
查询 JSON 的 筛选 字段中使用的语言在 Elasticsearch Query DSL 文档中进行了描述。
-
查询 JSON 的 aggregations 字段中使用的语言在 Elasticsearch Aggregations 文档中进行了详细描述。
索引
不同类型的数据存储在不同的索引中。例如,工作 数据存储在 jobPart 索引中,而 costs 数据存储在 costs 索引中。所需的数据可以通过在聚合 API 端点 URL 中指定索引来获取。
父子文档
关于工作的部分信息存储在Elasticsearch数据库中的两个不同文档中。这些文档处于父子关系中。例如,关于工作项目的信息存储在父文档中,而关于工作本身的信息存储在子文档中。根据所需的信息,聚合可以在数据的父子聚合之前或之后进行,但数据聚合本身应在所有查询中存在。
常见数据
分析
{
"id": string,
"createdBy": <User>,
"dateCreated":日期,
"dateDeleted":日期,
"dateTrashed":日期,
"deleted": boolean,
"innerId": number,
"name": string,
"tags": string[],
"trashed": boolean,
"trashedBy": <User>,
"type": string
}
分配
{
"name": string,
"linguist": <User>,
"vendor": <Vendor>
}
Submitter portal
{
"id": string,
"createdBy": <User>,
"dateCreated":日期,
"dateDeleted":日期,
"dateTrashed":日期,
"deleted": boolean,
"name": string,
"tags": string[],
"trashed": boolean,
"urlId": string
}
买方
{
"id": string,
"name": string
}
客户
{
"id": string,
"dateCreated":日期,
"dateDeleted":日期,
"dateTrashed":日期,
"deleted": boolean,
"externalId": string,
"name": string,
"tags": string[],
"trashed": boolean,
}
成本中心
{
"id": string,
"dateCreated":日期,
"dateDeleted":日期,
"dateTrashed":日期,
"deleted": boolean,
"name": string,
"tags": string[],
"trashed": boolean,
}
领域
{
"id": string,
"dateCreated":日期,
"dateDeleted":日期,
"dateTrashed":日期,
"deleted": boolean,
"name": string,
"tags": string[],
"trashed": boolean
}
工作
{
"id": ID,
"createdBy": <User>,
"dateCreated":日期,
"dateDeleted":日期,
"dateTrashed":日期,
"deleted": boolean,
"extension": string,
"fileName": string,
"groupCount": number,
"innerId":"1",
"languagePair": string,
"lastModified":日期,
"localePair": string,
"sourceLanguage": string,
"sourceLocale": string,
"tags": string[],
"targetLanguage": string,
"targetLocale": string",
"taskId": string,
"trashed": boolean,
"trashedBy": <User>,
"uid": string
}
JobPart
{
"id":"19",
"assignedTo": <Assignment>,
"beginIndex": number,
"buyer": <Buyer>,
"createdBy": <User>,
"dateCreated":日期,
"dateDue":日期,
"endIndex": number,
"groupCount": number,
"innerId": 字符串,
"lastModified":日期,
"level": number,
"status": 字符串,
"tags": [],
"uid": 字符串,
"workflowStep": <WorkflowStep>
}
机器翻译引擎
{
"ID": 字符串,
"deleted": boolean,
"default_": boolean,
"includeTags": boolean,
"name": string,
"tags": string[],
"type": string,
}
NetRateScheme
{
"id": string,
"createdBy": <User>,
"dateCreated":日期,
"dateDeleted":日期,
"default_": boolean,
"deleted": boolean,
"externalId": string,
"name": string,
"tags": string[],
}
PriceList
{
"id": string,
"createdBy": <User>,
"currency": string,
"dateCreated":日期,
"dateDeleted":日期,
"dateTrashed":日期,
"default_": boolean,
"deleted": boolean,
"name": string,
"tags": string[],
"trashed": boolean,
"unit": string
}
项目
{
"id": string,
"buyer": <Buyer>,
"client": <Client>,
"costCenter": <CostCenter>,
"createdBy": <User>,
"dateCreated":日期,
"dateDeleted":日期,
"dateDue":日期,
"dateTrashed":日期,
"deleted": boolean,
"domain": <Domain>,
"innerId": number,
"languagePairs": string[],
"localePairs": string[],
"mtEngine": <MtEngine>,
"name": string,
"note": string,
"owner": <User>,
"sourceLanguage": string,
"sourceLocale": string,
"status": string,
"subDomain": <SubDomain>,
"tags": string[],
"targetLanguages": string[],
"targetLocales": string[],
"trashed": boolean,
"trashedBy": <User>,
"uid": string,
"vendor": <Vendor>
}
报价
{
"id": string,
"createdBy": <User>,
"currency": string,
"dateCreated":日期,
"dateDeleted":日期,
"dateTrashed":日期,
"deleted": boolean,
"innerId": string,
"name": 字符串,
"status": 字符串,
"tags": 字符串[],
"trashed": boolean,
"trashedBy": <User>,
"unit": 字符串
}
服务
{
"id": 字符串,
"dateCreated":日期,
"dateDeleted":日期,
"dateTrashed":日期,
"deleted": boolean,
"name": 字符串,
"publicName": 字符串,
"tags": 字符串[],
"trashed": boolean,
"type": 字符串
}
子领域
{
"id": 字符串,
"dateCreated":日期,
"dateDeleted":日期,
"dateTrashed":日期,
"deleted": boolean,
"name": 字符串,
"tags": 字符串[],
"trashed": boolean
}
用户
{
"id": number,
"active": boolean,
"dateCreated":日期,
"dateDeleted":日期,
"dateTrashed":日期,
"deleted": boolean,
"email": 字符串,
"firstName": string,
"fullName": string,
"innerId": number,
"jobTitle": string,
"lastName": string,
"区域": string,
"注释": string,
"role": string,
"标记": string[],
"timeZone": string,
"trashed": boolean,
"userName": string
}
供应商
{
"id": string,
"candidate": boolean,
"dateCreated":Date,
"dateDeleted":日期,
"dateTrashed":日期,
"deleted": boolean,
"tags": 标记[],
"token": string,
"trashed": boolean
}
工作流步骤
{
"id": string,
"abbreviation": string,
"dateDeleted":日期,
"dateTrashed":日期,
"name": string,
"订单": number,
"标记": string[],
"trashed": boolean
}
分析索引
数据聚合
"data": {
"子": {
"类型": "analysisType"
}
}
父文档
{
"automationWidget": <AutomationWidget>,
"project": <Project>,
"service": <Service>
}
子文档
{
"analysis": <Analysis>,
"jobPart": <JobPart>,
"job": <Job>,
"netRateScheme": <NetRateScheme>,
"priority": number,
"data": {
"机器翻译": {
"match0": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
},
"match100": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
},
"match50": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
},
"match75": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
},
"match85": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
},
"match95": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
}
},
"重复": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
},
"翻译记忆库": {
"match0": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
},
"match100": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
},
"match101": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
},
"match50": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
},
"match75": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
},
"match85": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
},
"match95": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
}
},
"total": {
"字符": number,
"pages": number,
"percent": number,
"句段": number,
"words": number
}
}
}
API索引
数据聚合
"data": {
"子": {
"类型": "apiType"
}
}
父文档
{
"user": <User>
}
子文档
{
"request": {
"date":Date,
"host": string,
"ipv4": string,
"ipv6": string,
"location": string,
"method": string
},
"response": {
"date":日期,
"duration": number,
"status": number
},
"api": {
"action": string,
"asynch": boolean,
"类型": string,
"uri": string,
"ver": string
}
}
成本指数
数据聚合
"data": {
"子": {
"类型": "costsType"
}
}
父文档
{
"automationWidget": <AutomationWidget>,
"project": <Project>,
"service": <Service>
}
子文档
{
"analysis": <Analysis>,
"job": <Job>,
"jobPart": <JobPart>,
"netRateScheme": <NetRateScheme>,
"priceList": <PriceList>,
"quote": <Quote>,
"工作流步骤": <WorkflowStep>,
"priority": number,
"data": {
"机器翻译": {
"match0": number,
"match100": number,
"match50": number,
"match75": number,
"match85": number,
"match95": number
},
"重复": number,
"翻译记忆库": {
"match0": number,
"match100": number,
"match101": number,
"match50": number,
"match75": number,
"match85": number,
"match95": number
},
"total": number
}
}
工作部分索引
数据聚合
"data": {
"子": {
"类型": "jobPartType"
}
}
父文档
{
"automationWidget": <AutomationWidget>,
"project": <Project>,
"service": <Service>
}
子文档
{
"job": <Job>,
"jobPart": <JobPart>
"data": {
"counts": {
"chars": {
"total": number,
"confirmed": number,
"notConfirmed": number,
"已锁定": number,
"notLocked": number,
"已确认且已锁定": number,
"未确认且已锁定": number,
"完成": number,
"未完成": number
},
"groups": {
"total": number
},
"segments": {
"total": number,
"已确认": number,
"notConfirmed": number,
"已锁定": number,
"notLocked": number,
"已确认且已锁定": number,
"未确认且已锁定": number,
"完成": number,
"未完成": number,
"机器翻译": {
"postEdited": number,
"relevant": number,
"notRelevant": number
},
"QA": {
"checked": number,
"notChecked": number
}
},
"words": {
"total": number,
"confirmed": number,
"notConfirmed": number,
"locked": number,
"notLocked": number,
"已确认且已锁定": number,
"未确认且已锁定": number,
"完成": number,
"未完成": number
},
"QA": {
"warnings": number,
"ignoredWarnings": number,
"notIgnoredWarnings": number
}
}
}
-
HTTP 请求在 Postman 中运行.
-
通过项目经理或管理员个人资料登录,并使用登录 API 获取登录令牌。
-
向
api/v3/analytics/jobPart?token=<your_login_token>发送 POST 请求。
工作总数
查询
{
"aggregations": {
"data": {
"子": {
"类型": "jobPartType"
}
}
}
}
响应
{
"hits": {
"total":359
},
"aggregations": {
"data": {
"doc_count":14417
}
}
}
在 aggregations 字段下,data 聚合响应了 14,417 个匹配文档,显示在 doc_count 字段中。
总源词数量
查询
{
"aggregations": {
"data": {
"children": {
"type": "jobPartType"
},
"aggs": {
"wordCount": {
"sum": {
"field": "data.volume.words"
}
}
}
}
}
}
响应
{
"hits": {
"total":359
},
"aggregations": {
"data": {
"doc_count":14417,
"wordCount": {
"value":6893067
}
}
}
}
按译文语言划分的原文词总数
查询
{
"aggregations": {
"data": {
"children": {
"type": "jobPartType"
},
"aggs": {
"byTargetLanguage": {
"terms": {
"field": "job.targetLanguage",
"size":3
},
"aggs": {
"wordCount": {
"sum": {
"field": "data.volume.words"
}
}
}
}
}
}
}
}
Response
{
"hits": {
"total":359
},
"aggregations": {
"data": {
"byTargetLanguage": {
"doc_count_error_upper_bound":0,
"sum_other_doc_count":10071,
"buckets": [
{
"doc_count":1835,
"wordCount": {
"value":702721
},
"key": "cs"
},
{
"doc_count":1491,
"wordCount": {
"value":2602529
},
"key": "de"
},
{
"doc_count":1020,
"wordCount": {
"value":92676
},
"key": "fi"
}
]
},
"doc_count":14417
}
}
}
buckets 实体在响应中显示。当分析模块被要求按某个类别拆分数据时,结果会以一个桶列表的形式展示。每个桶包含一个键,用于定义该桶所代表的数据(在本例中为译文语言),以及一个只针对数据集该部分的值(其他聚合)。
按项目状态拆分的工作总数
查询
{
"aggregations": {
"projectStatus": {
"terms": {
"field": "project.status"
},
"aggs": {
"data": {
"children": {
"type": "jobPartType"
}
}
}
}
}
}
响应
{
"hits": {
"total":359
},
"aggregations": {
"projectStatus": {
"doc_count_error_upper_bound":0,
"sum_other_doc_count":0,
"buckets": [
{
"doc_count":326,
"data": {
"doc_count":14318
},
"key":"NEW"
},
{
"doc_count":31,
"data": {
"doc_count":89
},
"key":"完成"
},
{
"doc_count":1,
"data": {
"doc_count":4
},
"key":"分配"
},
{
"doc_count":1,
"data": {
"doc_count":6
},
"key":"供应商已拒绝"
}
]
}
}
}
分析模块已被告知在数据聚合之前按项目状态拆分数据。
来自 NEW 项目的工作总数
查询
{
"filter": {
"term": {
"project.status":"NEW"
}
},
"aggregations": {
"data": {
"children": {
"type": "jobPartType"
}
}
}
}
响应
{
"hits": {
"total":326
},
"aggregations": {
"data": {
"doc_count":14318
}
}
}
筛选字段用于查询。这在后续聚合之前缩小了数据范围。