JSON Schemas

In this section:

This appendix provides a reference for the JSON schema documents used by TIBCO DQ for data analysis results.

Profiling Results JSON Schema

{
	"$schema": "https://json-schema.org/draft/2020-12/schema",
	"$id": "https://tibco.com/tdq/schemas/profile",
	"title": "Profile",
	"description": "Profile of the dataset",
	"inputDataSetName": {
		"type": "string",
		"description": "Name of the input data set"
	},
	"createdBy": {
		"type": "string",
		"description": "User account that executed profiling analysis and created the data profile"
	},
	"createdDate": {
		"type": "string",
		"description": "Date and time when the data profile is created"
	},
	"countRows": {
		"type": "integer",
		"description": "Number of rows analyzed"
	},
	"countVariables": {
		"type": "integer",
		"description": "Number of columns analyzed"
	},
	"countObservations": {
		"type": "integer",
		"description": "Number of observations (rows times columns) analyzed"
	},
	"countDuplicateRows": {
		"type": "integer",
		"description": "Number of duplicate rows in the data set"
	},
	"pctDuplicateRows": {
		"type": "number",
		"description": "Percentage of total rows that are duplicate rows in the data set"
	},
	"countDuplicateRowsRemoved": {
		"type": "integer",
		"description": "Number of duplicate rows removed in the deduplication process"
	},
	"pctDuplicateRowsRemoved": {
		"type": "number",
		"description": "Percentage of total rows that were duplicate and were removed in the deduplication process"
	},
	"countRowsAfterDeduplication": {
		"type": "number",
		"description": "Number of total rows that remain in the data set after the deduplication process"
	},
	
"countMissing": {
		"type": "integer",
		"description": "Number of observations that are missing (blank or null) in the data set"
	},
	"pctMissing": {
		"type": "number",
		"description": "Percentage of total observations that are missing (blank or null) in the data set"
	},
	"profileScore": {
		"type": "number",
		"description": "Data profile score for the entire data set"
	},
	"variables": {
		"type": "array",
		"items": {
			"type": "object",
			"properties": {
				"name": {
					"type": "string",
					"description": "Name of the variable"
				},
				"type": {
					"enum": [
						"string",
						"number",
						"date"
					],
					"description": "Data type of the variable"
				},
				"count": {
					"type": "integer",
					"description": "Total number of observations"
				},
				"countDistinct": {
					"type": "integer",
					"description": "Total number of distinct observations, distinct includes null and blank values"
				},
				"countUnique": {
					"type": "integer",
					"description": "Total number of unique observations, unique does not include null or blank values"
				},
				"countNonUnique": {
					"type": "integer",
					"description": "Total number of non-unique observations"
				},
				"countNulls": {
					"type": "integer",
					"description": "Total number of null observations"
				},
				"countNotNulls": {
					"type": "integer",
					"description": "Total number of non-null observations"
				},
				
"countBlanks": {
					"type": "integer",
					"description": "Total number of blank observations"
				},
				"pctUnique": {
					"type": "number",
					"description": "Percentage of total observations that are unique"
				},
				"pctComplete": {
					"type": "number",
					"description": "Percentage of total observations that are not null or blank"
				},
				"sensitive": {
					"type": "boolean",
					"description": "true if the variable contains sensitive data"
				},
				"frequency": {
					"type": "array",
					"description": "Frequency of values",
					"items": {
						"type": "object",
						"properties": {
							"value": {
								"type": "string",
								"description": "Observation value"
							},
							"count": {
								"type": "integer",
								"description": "Number of observations that has this value"
							},
							"pct": {
								"type": "number",
								"description": "Percentage of total observations that has this value"
							}
						}
					}
				},
				"lengths": {
					"type": "object",
					"description": "Character length of the observation values",
					"properties": {
						"min": {
							"type": "integer",
							"description": "Minimum length of all the observation values"
						},
						"max": {
							"type": "integer",
							"description": "Maximum length of all the observation values"
						},
						
"median": {
							"type": "number",
							"description": "Median length of all the observation values"
						},
						"avg": {
							"type": "number",
							"description": "Average length of all the observation values"
						}
					}
				},
				"patterns": {
					"type": "array",
					"items": {
						"type": "object",
						"description": "Patterns discovered from the observation values",
						"properties": {
							"value": {
								"type": "string",
								"description": "Pattern value"
							},
							"count": {
								"type": "integer",
								"description": "Total number of observation values that have this pattern"
							},
							"pct": {
								"type": "number",
								"description": "Percentage of total observation values that have this pattern"
							}
						}
					}
				},
				"masks": {
					"type": "array",
					"items": {
						"type": "object",
						"description": "Masks discovered from the observation values",
						"properties": {
							"value": {
								"type": "string",
								"description": "Mask value"
							},
							"count": {
								"type": "integer",
								"description": "Total number of observation values that have this mask"
							},
						
	"pct": {
								"type": "number",
								"description": "Percentage of total observation values that have this mask"
							}
						}
					}
				},
				"contentTypes": {
					"type": "array",
					"description": "Data classes discovered from the observation values",
					"items": {
						"type": "object",
						"properties": {
							"value": {
								"type": "string",
								"description": "Name of the data class"
							},
							"count": {
								"type": "integer",
								"description": "Total number of observation values that belong to this data class"
							},
							"pct": {
								"type": "number",
								"description": "Percentage of total observation values that belong to this data class"
							}
						}
					}
				},
				"numeric": {
					"type": "array",
					"description": "Numerical statistics of the observation values",
					"items": {
						"type": "object",
						"properties": {
							"variable": {
								"type": "string",
								"description": "Name of the variable"
							},
							"avg": {
								"type": "number",
								"description": "Average value"
							},
							"min": {
								"type": "number",
								"description": "Minimum value"
							},
						
	"max": {
								"type": "number",
								"description": "Maximum value"
							},
							"stdDev": {
								"type": "number",
								"description": "Standard deviation"
							},
							"pctl25": {
								"type": "number",
								"description": "25th percentile"
							},
							"pctl50": {
								"type": "number",
								"description": "50th percentile"
							},
							"pctl75": {
								"type": "number",
								"description": "75th percentile"
							}
						}
					}
				},
				"profileScore": {
					"type": "number",
					"description": "Profile score for the individual variable"
				}
			}
		}
	},
	"notNullsSvg": {
		"type": "string",
		"description": "XML representing the completeness chart of the data set"
	},
	"variableOptions": {
		"type": "array",
		"items": {
			"type": "object",
			"description": "Data expectations set by the user",
			"properties": {
				"id": {
					"type": "string",
					"description": "Name of the variable"
				},
"businessImpact": {
					"type": "string",
					"description": "A number that represents the impact (HIGH, MEDIUM or LOW) of the variable on business outcomes"
				},
				
				"allowsNulls": {
					"type": "string",
					"description": "True if null values are expected for this variable"
				},
				"shouldBeUnique": {
					"type": "string",
					"description": "True if only unique values are expected for this variable"
				}
			}
		}
	}
}

K-Means Cluster Analysis Results JSON Schema

{
	"$schema": "https://json-schema.org/draft/2020-12/schema",
	"$id": "https://tibco.com/tdq/schemas/cluster_analysis/kmeans",
	"title": "KMeans Cluster Analysis",
	"description": "KMeans cluster analysis of two variables in a given data set",
	"type": "object",
	"properties": {
		"method": {
			"type": "string",
			"const": "kmeans",
			"description": "Cluster analysis method"
		},
		"count": {
			"type": "integer",
			"description": "Number of rows analyzed"
		},
		"variables": {
			"type": "array",
			"description": "Name of the variables",
			"items": {
				"type": "string",
				"minItems": 2,
				"maxItems": 2,
				"uniqueItems": true
			}
		},
		"svg": {
			"type": "string",
			"description": "Cluster analysis SVG graph"
		}
	}
}

Correlation Analysis Results JSON Schema

{
	"$schema": "https://json-schema.org/draft/2020-12/schema",
	"$id": "https://tibco.com/tdq/schemas/correlations",
	"title": "Correlation Analysis",
	"description": "Correlation analysis of variables in a dataset",
	"type": "array",
	"items": {
		"type": "object",
		"properties": {
			"method": {
				"type": "string",
				"enum": [
					"kendall",
					"pearson",
					"spearman"
				],
				"description": "Cluster analysis method"
			},
			"count": {
				"type": "integer",
				"description": "Number of rows analyzed"
			},
			"variables": {
				"type": "array",
				"description": "Names of the variables",
				"items": {
					"type": "string",
					"uniqueItems": true
				}
			},
			"coefficients": {
				"type": "array",
				"description": "Correlation coefficients for a pair of variables",
				"items": {
					"type": "array",
					"items": {
						"type": "number",
						"minimum": -1.0,
						"maximum": 1.0
					}
				}
			},
			"svg": {
				"type": "string",
				"description": "Correlation analysis SVG graph"
			}
		}
	}
}

DQ Analysis Summary Results JSON Schema

{
	"$schema": "https://json-schema.org/draft/2020-12/schema",
	"id": "urn:jsonschema:com:tibco:tdq:common:model:butler:CleansingResults",
	"type": "object",
	"description": "Summarized results of Rules driven data quality analysis",
	"properties": {
		"inputFile": {
			"type": "string",
			"description": "File name and location of the input data set"
		},
		"outputFile": {
			"type": "string",
			"description": "File name and location of the output data set"
		},
		"dqScore": {
			"type": "integer",
			"description": "Data quality score for the data set"
		},
		"results": {
			"type": "array",
			"description": "Results for each DQ Rule executed against a variable or group of variables",
			"items": {
				"type": "object",
				"id": "urn:jsonschema:com:tibco:tdq:common:model:butler:Summary",
				"properties": {
					"fileName": {
						"type": "string",
						"description": "File name and location of the cleansed output generated for this combination of Variable or Group of Variables and DQ Rule"
					},
					"ruleId": {
						"type": "string",
						"description": "Unique identifier of the DQ Rule"
					},
					"groupName": {
						"type": "string",
						"description": "Name of the Variable or Group of Variables"
					},
					"dqScore": {
						"type": "integer",
						"description": "Data quality score for this combination of Variable or Group of Variables and DQ Rule"
					},
					"tags": {
						"type": "object",
						"additionalProperties": {
							"type": "integer",
							"description": "Tag values generated during the DQ analysis"
						}
					},
	"countCleansed": {
						"type": "integer",
						"description": "Total number of cleansed values"
					},
					"countMissing": {
						"type": "integer",
						"description": "Total number of missing values"
					},
					"countProcessed": {
						"type": "integer",
						"description": "Total number of processed values"
					},
					"countInvalid": {
						"type": "integer",
						"description": "Total number of invalid values"
					},
					"countValid": {
						"type": "integer",
						"description": "Total number of valid values"
					}
				}
			}
		},
		"businessContext": {
			"type": "object",
			"id": "urn:jsonschema:com:tibco:tdq:common:model:dataset:DataSetContext",
			"properties": {
				"id": {
					"type": "string",
					"description": "Name of the variable"
				},
				"businessImpact": {
					"type": "string",
					"description": "A number that represents the impact (HIGH, MEDIUM or LOW) of the variable on business outcomes"
				},
				"allowsNulls": {
					"type": "string",
					"description": "True if null values are expected for this variable"
				},
				"shouldBeUnique": {
					"type": "string",
					"description": "True if only unique values are expected for this variable"
				}
			}
		}
	}
}