MatchCase Score Combiner
A MatchCase query combiner, like other combiners, merges a set of scores into a single output score. Like all other combiners, it can appear as any non-leaf node in a query tree. There is one important subtle difference between the MatchCase combiner and other combiners. Other combiners output a score that represents a degree of similarity between the query and the record. The MatchCase Score combiner outputs a score that represents the likelihood that the query and the record represent the same entity. Like the similarity score, this is not a probability, it is a relative measure. Also, like the similarity score, it is a value between 0.0 and 1.0.
The distinction between a similarity score and the match likelihood score output by the MatchCase combiner is an important one. Two records might be very similar but do not represent the same entity. On the other hand, two records might have a low similarity score but represent the same entity. In determining whether two records represent the same entity, knowing which portions matched, and to what degree is critical. By using the MatchCase Score combiner, you can directly express which portions of the query must match, and to what degree. For more information on the MatchCase Score combiner and examples of how it is used, see the "MatchCase Score Combiner" section in the TIBCO Patterns Concept Guide.
The MatchCase Score combiner is a query expression and accepts the same items in the LPAR_LST_QEXPR list as all other query expressions. The MatchCase Score combiner accepts the following additional LPAR values in the LPAR_LST_QOPTS list. These LPARs are valid only within the LPAR_LST_QOPTS list of a query expression of type LKT_QEXPR_MATCH.
| • | LPAR_DBL_MATCHSTRENGTH This holds the match strength for the MatchCase. This is required for all LKT_QEXPR_MATCH type query expressions. |
| • | LPAR_DBLARR_MATCHTHRESHOLDS This contains the list of threshold values. Entries corresponding to core querylets must be negative. The absolute value of the entry represents the minimum score allowed for the core querylet. Entries corresponding to secondary querylets must be positive. Positive entries are the match/non-match threshold value for the secondary querylet. This is required for all LKT_QEXPR_MATCH type query expressions. |
| • | LPAR_DBLARR_MATCHREWARDS This is the reward weight factors. Values corresponding to core querylets are ignored. This is required for all LKT_QEXPR_MATCH type query expressions. |
| • | LPAR_DBLARR_MATCHPENALTIES This is the penalty weight factors. Values corresponding to core querylets are ignored. This is optional. If this LPAR is not given, penalty factors default to the reward factors. |
The following is the example from the TIBCO® Patterns Concepts Guide rewritten using the “C” API:
/* Querylet LPARs for each category (setting not shown) */
lpar_t name_cat_qry ; /* name category querylet */
lpar_t street_cat_qry ; /* street address category querylet */
lpar_t location_cat_qry ; /* location category querylet */
lpar_t phone_cat_qry ; /* phone number category querylet */
lpar_t dob_cat_qry ; /* date of birth category querylet */
/* Our Intermediate Querylets */
lpar_t cat_qrys ; /* category queries for match rules */
lpar_t rule_1_qry ; /* match case query for rule 1 */
lpar_t rule_1_qopts ; /* match case settings for rule 1 */
lpar_t rule_2_qry ; /* match case query for rule 2 */
lpar_t rule_2_qopts ; /* match case settings for rule 2 */
lpar_t final_query_args ; /* arguments for full query */
/* The final query. */
lpar_t final_query ;
/* For example we define rule parameters as static values */
/* These could be read in from a configuration file. */
/* Rule 1 */
/* ------ */
/* Weights for core categories, others are left as 0.0 */
static double rule_1_weights[] = { 1.0, 0.80, 0.80, 0.0, 0.0 } ;
/* Thresholds for all categories, core categories are negative */
static double rule_1_thresholds[] = { -0.70, -0.80, -0.75, 0.85, 0.60 } ;
/* Rewards for secondary categories (core categories left 0.0) */
static double rule_1_rewards[] = { 0.0, 0.0, 0.0, 0.15, 0.40 } ;
/* Penalty factors for secondary categories */
static double rule_1_penalties[] = { 0.0, 0.0, 0.0, 0.05, 0.50 } ;
/* Rule 2 */
/* ------ */
/* Weights for core categories, others are left as 0.0 */
static double rule_2_weights[] = { 1.0, 0.0, 0.0, 0.60, 0.90 } ;
/* Thresholds for all categories, core categories are negative */
static double rule_2_thresholds[] = { -0.70, 0.85, 0.80, -0.80, -0.50 } ;
/* Rewards for secondary categories (core categories left 0.0) */
static double rule_2_rewards[] = { 0.0, 0.35, 0.35, 0.0, 0.0 } ;
/* Penalty factors for secondary categories */
static double rule_2_penalties[] = { 0.0, 0.10, 0.10, 0.0, 0.0 } ;
/* Create the query expression arguments list. */
cat_qrys = lpar_create_lst(LPAR_LST_QEXPR_ARGS) ;
lpar_append_lst(cat_qrys, name_cat_qry) ;
lpar_append_lst(cat_qrys, street_cat_qry) ;
lpar_append_lst(cat_qrys, location_cat_qry) ;
lpar_append_lst(cat_qrys, phone_cat_qry) ;
lpar_append_lst(cat_qrys, dob_cat_qry) ;
/* Define Rule 1 Query */
/* ------------------- */
/* Create the Query. */
rule_1_qry = lpar_create_lst(LPAR_LST_QEXPR) ;
/* Mark it as a Match Case Query. */
lpar_append_lst(rule_1_qry,
lpar_create_int(LPAR_INT_QEXPR_TYPE,
LKT_QEXPR_MATCH)) ;
/* Add the arguments (category querylets) */
lpar_append_lst(rule_1_qry, cat_qrys) ;
/* Core query weights are added as querylet weights. */
lpar_append_lst(rule_1_qry,
lpar_create_dblarr(LPAR_DBLARR_QUERYLETWEIGHTS,
rule_1_weights,
5)) ;
/* All other Match Case parameters go in the Query Options list. */
rule_1_qopts = lpar_create_lst(LPAR_LST_QOPTS) ;
/* Add match strength. */
lpar_append_lst(rule_1_qopts,
lpar_create_int(LPAR_DBL_MATCHSTRENGTH, 0.8)) ;
/* Add thresholds */
lpar_append_lst(rule_1_qopts,
lpar_create_dblarr(LPAR_DBLARR_MATCHTHRESHOLDS,
rule_1_thresholds,
5)) ;
/* Add rewards */
lpar_append_lst(rule_1_qopts,
lpar_create_dblarr(LPAR_DBLARR_MATCHREWARDS,
rule_1_rewards,
5)) ;
/* Add Penalties */
lpar_append_lst(rule_1_qopts,
lpar_create_dblarr(LPAR_DBLARR_MATCHPENALTIES,
rule_1_penalties,
5)) ;
/* Add Query Options to Rule 1 query. */
lpar_append_lst(rule_1_qry, rule_1_qopts) ;
/* Define Rule 2 Query */
/* ------------------- */
/* Create the Query. */
rule_2_qry = lpar_create_lst(LPAR_LST_QEXPR) ;
/* Mark it as a Match Case Query. */
lpar_append_lst(rule_2_qry,
lpar_create_int(LPAR_INT_QEXPR_TYPE,
LKT_QEXPR_MATCH)) ;
/* Add the arguments (category querylets, we make a copy) */
lpar_append_lst(rule_2_qry, lpar_copy(cat_qrys)) ;
/* Core query weights are added as querylet weights. */
lpar_append_lst(rule_2_qry,
lpar_create_dblarr(LPAR_DBLARR_QUERYLETWEIGHTS,
rule_2_weights,
5)) ;
/* All other Match Case parameters go in the Query Options list. */
rule_2_qopts = lpar_create_lst(LPAR_LST_QOPTS) ;
/* Add match strength. */
lpar_append_lst(rule_2_qopts,
lpar_create_int(LPAR_DBL_MATCHSTRENGTH, 0.85)) ;
/* Add thresholds */
lpar_append_lst(rule_2_qopts,
lpar_create_dblarr(LPAR_DBLARR_MATCHTHRESHOLDS,
rule_2_thresholds,
5)) ;
/* Add rewards */
lpar_append_lst(rule_2_qopts,
lpar_create_dblarr(LPAR_DBLARR_MATCHREWARDS,
rule_2_rewards,
5)) ;
/* Add Penalties */
lpar_append_lst(rule_2_qopts,
lpar_create_dblarr(LPAR_DBLARR_MATCHPENALTIES,
rule_2_penalties,
5)) ;
/* Add Query Options to Rule 2 query. */
lpar_append_lst(rule_2_qry, rule_2_qopts) ;
/* Final Query - Is OR of two Match Case Queries */
final_query = lpar_create_lst(LPAR_LST_QEXPR) ;
/* Mark it as an OR Query. */
lpar_append_lst(final_query,
lpar_create_int(LPAR_INT_QEXPR_TYPE,
LKT_QEXPR_OR)) ;
/* create arguments list. */
final_query_args = lpar_create_lst(LPAR_LST_QEXPR_ARGS) ;
lpar_append_lst(final_query_args, rule_1_qry) ;
lpar_append_lst(final_query_args, rule_2_qry) ;
/* add it to OR query */
lpar_append_lst(final_query, final_query_args) ;
/* The query is now completed. */
return final_query ;