运行聚合查询的数据存储建立在Elasticsearch之上(当前版本1.7.x)。Elasticsearch提供实时搜索和聚合能力,Analytics API在其上形成了一层薄薄的、安全增强的层。示例中使用了Elasticsearch查询和聚合语言。
-
查询JSON的筛选字段中使用的语言在Elasticsearch查询DSL文档中进行了描述。
-
查询JSON的聚合字段中使用的语言在Elasticsearch聚合文档中进行了详细描述。
索引
不同类型的数据存储在不同的索引中。例如,工作数据存储在工作部分索引中,而成本数据存储在成本索引中。所需数据可以通过在聚合API端点URL中指定索引来指定。
父子文档
关于工作的部分信息存储在Elasticsearch数据库中的两个不同文档中。这些文档处于父子关系中。例如,关于工作项目的信息存储在父文档中,而关于工作的本身的信息存储在子文档中。根据所需的信息,聚合可以在数据父子聚合之前或之后进行,但数据聚合本身应在所有查询中存在。
常见数据
分析
{
"id": string,
"createdBy": <User>,
"dateCreated": Date,
"dateDeleted": Date,
"dateTrashed": Date,
"deleted": boolean,
"innerId": number,
"name": string,
"tags": string[],
"trashed": boolean,
"trashedBy": <User>,
"type": string
}
分配
{
"name": string,
"linguist": <User>,
"vendor": <Vendor>
}
提交页面
{
"id": string,
"createdBy": <User>,
"dateCreated": Date,
"dateDeleted": Date,
"dateTrashed": Date,
"deleted": boolean,
"name": string,
"tags": string[],
"trashed": boolean,
"urlId": string
}
买方
{
"id": string,
"name": string
}
客户
{
"id": string,
"dateCreated": Date,
"dateDeleted": Date,
"dateTrashed": Date,
"deleted": boolean,
"externalId": string,
"name": string,
"tags": string[],
"trashed": boolean,
}
CostCenter
{
"id": string,
"dateCreated": Date,
"dateDeleted": Date,
"dateTrashed": Date,
"deleted": boolean,
"name": string,
"tags": string[],
"trashed": boolean,
}
领域
{
"id": string,
"dateCreated": Date,
"dateDeleted": Date,
"dateTrashed": Date,
"deleted": boolean,
"name": string,
"tags": string[],
"trashed": boolean
}
工作
{
"id": string,
"createdBy": <User>,
"dateCreated": Date,
"dateDeleted": Date,
"dateTrashed": Date,
"deleted": boolean,
"extension": string,
"fileName": string,
"groupCount": number,
"innerId": "1",
"languagePair": string,
"lastModified": Date,
"localePair": string,
"sourceLanguage": string,
"sourceLocale": string,
"tags": string[],
"targetLanguage": string,
"targetLocale": string",
"taskId": string,
"trashed": boolean,
"trashedBy": <User>,
"uid": string
}
JobPart
{
"id": "19",
"assignedTo": <Assignment>,
"beginIndex": number,
"buyer": <Buyer>,
"createdBy": <User>,
"dateCreated": Date,
"dateDue": Date,
"endIndex": 数字,
"groupCount": number,
"innerId": string,
"lastModified": Date,
"level": number,
"status": string,
"tags": [],
"uid": string,
"workflowStep": <WorkflowStep>
}
MtEngine
{
"id": string,
"deleted": boolean,
"default_: 布尔值,
"includeTags": boolean,
"name": string,
"tags": string[],
"type": string,
}
NetRateScheme
{
"id": string,
"createdBy": <User>,
"dateCreated": Date,
"dateDeleted": Date,
"default_": boolean,
"deleted": boolean,
"externalId": string,
"name": string,
"tags": string[],
}
PriceList
{
"id": string,
"createdBy": <User>,
"currency": string,
"dateCreated": Date,
"dateDeleted": Date,
"dateTrashed": Date,
"default_": boolean,
"deleted": boolean,
"name": string,
"tags": string[],
"trashed": boolean,
"unit": string
}
项目
{
"id": string,
"buyer": <Buyer>,
"client": <Client>,
"costCenter": <CostCenter>,
"createdBy": <User>,
"dateCreated": Date,
"dateDeleted": Date,
"dateDue": Date,
"dateTrashed": Date,
"deleted": boolean,
"domain": <Domain>,
"innerId": number,
"languagePairs": string[],
"localePairs": string[],
"mtEngine": <MtEngine>,
"name": string,
"note": string,
"owner": <User>,
"sourceLanguage": string,
"sourceLocale": string,
"status": string,
"subDomain": <SubDomain>,
"tags": string[],
"targetLanguages": string[],
"targetLocales": string[],
"trashed": boolean,
"trashedBy": <User>,
"uid": string,
"vendor": <Vendor>
}
报价
{
"id": string,
"createdBy: <User>,
"currency": string,
"dateCreated": Date,
"dateDeleted": Date,
"dateTrashed": Date,
"deleted": boolean,
"innerId": string,
"name": string,
"status": string,
"tags": string[],
"trashed": boolean,
"trashedBy": <User>,
"unit": string
}
服务
{
"id": string,
"dateCreated": Date,
"dateDeleted": Date,
"dateTrashed": Date,
"deleted": boolean,
"name": string,
"publicName": string,
"tags": string[],
"trashed": boolean,
"type": string
}
SubDomain
{
"id": string,
"dateCreated": Date,
"dateDeleted": Date,
"dateTrashed": Date,
"deleted": boolean,
"name": string,
"tags": string[],
"trashed": boolean
}
用户
{
"id": number,
"active": boolean,
"dateCreated": Date,
"dateDeleted": Date,
"dateTrashed": Date,
"deleted": boolean,
"email": string,
"firstName": string,
"fullName": string,
"innerId": number,
"jobTitle": string,
"lastName": string,
"locale": string,
"note": string,
"role": string,
"tags": string[],
"timeZone": string,
"trashed": boolean,
"userName": string
}
供应商
{
"id": string,
"candidate": boolean,
"dateCreated": Date,
"dateDeleted": Date,
"dateTrashed": Date,
"deleted": boolean,
"tags": string[],
"token": string,
"trashed": boolean
}
工作流步骤
{
"id": string,
"abbreviation": string,
"dateDeleted": Date,
"dateTrashed": Date,
"name": string,
"order": number,
"tags": string[],
"trashed": boolean
}
分析索引
数据聚合
"data": {
"children": {
"type": "analysisType"
}
}
父文档
{
"automationWidget": <AutomationWidget>,
"project": <Project>,
"service": <Service>
}
子文档
{
"analysis": <Analysis>,
"jobPart": <JobPart>,
"job": <Job>,
"netRateScheme": <NetRateScheme>,
"priority": number,
"data": {
"mt": {
"match0": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
},
"match100": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
},
"match50": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
},
"match75": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
},
"match85": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
},
"match95": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
}
},
"repetitions": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
},
"tm": {
"match0": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
},
"match100": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
},
"match101": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
},
"match50": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
},
"match75": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
},
"match85": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
},
"match95": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
}
},
"total": {
"characters": number,
"pages": number,
"percent": number,
"segments": number,
"单词": 数字
}
}
}
API索引
数据聚合
"data": {
"children": {
"type": "apiType"
}
}
父文档
{
"user": <User>
}
子文档
{
"request": {
"date": Date,
"host": string,
"ipv4": string,
"ipv6": string,
"location": string,
"method": string
},
"response": {
"date": Date,
"duration": number,
"status": number
},
"api": {
"action": string,
"asynch": boolean,
"type": string,
"uri": string,
"ver": string
}
}
成本索引
数据聚合
"data": {
"children": {
"type": "costsType"
}
}
父文档
{
"automationWidget": <AutomationWidget>,
"project": <Project>,
"service": <Service>
}
子文档
{
"analysis": <Analysis>,
"job": <Job>,
"jobPart": <JobPart>,
"netRateScheme": <NetRateScheme>,
"priceList": <PriceList>,
"quote": <Quote>,
"workflowStep": <WorkflowStep>,
"priority": number,
"data": {
"mt": {
"match0": number,
"match100": number,
"匹配50": 数字,
"匹配75": 数字,
"匹配85": 数字,
"匹配95": 数字
},
"重复次数": 数字,
"tm": {
"match0": number,
"match100": number,
"match101": number,
"匹配50": 数字,
"匹配75": 数字,
"匹配85": 数字,
"匹配95": 数字
},
"total": number
}
}
jobPart Index
数据聚合
"data": {
"children": {
"type": "jobPartType"
}
}
父文档
{
"automationWidget": <AutomationWidget>,
"project": <Project>,
"service": <Service>
}
子文档
{
"job": <Job>,
"jobPart": <JobPart>
"data": {
"counts": {
"chars": {
"total": number,
"confirmed": number,
"notConfirmed": number,
"locked": number,
"notLocked": number,
"confirmedAndLocked": number,
"notConfirmedAndLocked": number,
"completed": number,
"notCompleted": number
},
"groups": {
"total": number
},
"segments": {
"total": number,
"confirmed": number,
"notConfirmed": number,
"locked": number,
"notLocked": number,
"confirmedAndLocked": number,
"notConfirmedAndLocked": number,
"completed": number,
"notCompleted": number,
"mt": {
"postEdited": number,
"relevant": number,
"notRelevant": number
},
"qa": {
"checked": number,
"notChecked": number
}
},
"words": {
"total": number,
"confirmed": number,
"notConfirmed": number,
"locked": number,
"notLocked": number,
"confirmedAndLocked": number,
"notConfirmedAndLocked": number,
"completed": number,
"notCompleted": number
},
"qa": {
"警告": 数字,
"ignoredWarnings": number,
"notIgnoredWarnings": number
}
}
}
-
HTTP 请求在 Postman 中运行。
-
通过项目经理或管理员个人资料登录,并使用登录 API 获取登录令牌。
-
POST requests to
api/v3/analytics/jobPart?token=<your_login_token>
作业总数
查询
{
"aggregations": {
"data": {
"children": {
"type": "jobPartType"
}
}
}
}
响应
{
"hits": {
"total": 359
},
"aggregations": {
"data": {
"doc_count": 14417
}
}
}
在 聚合 字段下,数据 聚合响应了 14,417 个匹配文档,显示在 文档计数 字段中。
源词总数
查询
{
"aggregations": {
"data": {
"children": {
"type": "jobPartType"
},
"aggs": {
"wordCount": {
"sum": {
"field": "data.volume.words"
}
}
}
}
}
}
响应
{
"hits": {
"total": 359
},
"aggregations": {
"data": {
"doc_count": 14417,
"wordCount": {
"value": 6893067
}
}
}
}
源词总数除以目标语言
查询
{
"aggregations": {
"data": {
"children": {
"type": "jobPartType"
},
"aggs": {
"byTargetLanguage": {
"terms": {
"field": "job.targetLanguage",
"size": 3
},
"aggs": {
"wordCount": {
"sum": {
"field": "data.volume.words"
}
}
}
}
}
}
}
}
响应
{
"hits": {
"total": 359
},
"aggregations": {
"data": {
"byTargetLanguage": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 10071,
"buckets": [
{
"doc_count": 1835,
"wordCount": {
"value": 702721
},
"key": "cs"
},
{
"doc_count": 1491,
"wordCount": {
"value": 2602529
},
"key": "de"
},
{
"doc_count": 1020,
"wordCount": {
"value": 92676
},
"key": "fi"
}
]
},
"doc_count": 14417
}
}
}
桶实体在响应中显示。当分析模块被要求按某个类别拆分数据时,结果以桶的列表表示。每个桶包含一个键,定义该桶表示的数据(在这种情况下是目标语言)和一个特定于该数据集部分的值(其他聚合)。
按项目状态划分的工作总数
查询
{
"aggregations": {
"projectStatus": {
"terms": {
"field": "project.status"
},
"aggs": {
"data": {
"children": {
"type": "jobPartType"
}
}
}
}
}
}
响应
{
"hits": {
"total": 359
},
"aggregations": {
"projectStatus": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"doc_count": 326,
"data": {
"doc_count": 14318
},
"key": "NEW"
},
{
"doc_count": 31,
"data": {
"doc_count": 89
},
"key": "COMPLETED"
},
{
"doc_count": 1,
"data": {
"doc_count": 4
},
"key": "ASSIGNED"
},
{
"doc_count": 1,
"data": {
"doc_count": 6
},
"key": "DECLINED_BY_VENDOR"
}
]
}
}
}
分析模块已被告知在数据聚合之前按项目状态拆分数据。
来自新项目的工作总数
查询
{
"filter": {
"term": {
"project.status": "NEW"
}
},
"aggregations": {
"data": {
"children": {
"type": "jobPartType"
}
}
}
}
响应
{
"hits": {
"total": 326
},
"aggregations": {
"data": {
"doc_count": 14318
}
}
}
筛选字段在查询中使用。这在后续聚合之前缩小了数据范围。