First, create a fiile named ci.definitions.1.0.cdm.json and use the following - then upload to correct roog of storage container you're using:
{
"jsonSchemaSemanticVersion": "1.0.0",
"imports": [
{
"corpusPath": "cdm:/foundations.cdm.json"
},
{
"corpusPath": "cdm:/primitives.cdm.json"
},
{
"corpusPath": "cdm:/meanings.concepts.cdm.json"
},
{
"corpusPath": "cdm:/meanings.measurement.cdm.json"
}
],
"definitions": [
{
"traitName": "is.CI.partition.incremental",
"extendsTrait": "is",
"hasParameters": [
{
"name": "regularExpression",
"dataType": "string",
"explanation": "The regular expression to use for the incremental partition.",
"required": true
},
{
"name": "rootLocation",
"dataType": "string",
"explanation": "The root location to use for discovering the partitions. If not specified, then we default to the rootLocation of the first data partition pattern",
"required": false
},
{
"name": "parameters",
"dataType": "list",
"explanation": "Parameters for the regex capture i.e capture groups.",
"required": false
}
]
},
{
"traitName": "is.CI.partition.incremental.upsert",
"extendsTrait": "is.CI.partition.incremental",
"hasParameters": []
},
{
"traitName": "is.CI.partition.incremental.delete",
"extendsTrait": "is.CI.partition.incremental",
"hasParameters": []
},
{
"traitName": "is.formatted",
"extendsTrait": "is",
"explanation": "a root for traits that describe how data is formatted"
},
{
"traitName": "means.reference.culture",
"extendsTrait": "means.reference"
},
{
"traitName": "means.reference.culture.tag",
"extendsTrait": "means.reference.culture"
},
{
"dataTypeName": "cultureTag",
"extendsDataType": "languageTag",
"explanation": "a BCP 47 language tag",
"exhibitsTraits": [
"means.reference.culture.tag"
]
},
{
"traitName": "is.formatted.forCulture",
"extendsTrait": "is.formatted",
"explanation": "values are stored using the specified culture",
"hasParameters": [
{
"name": "culture",
"dataType": "cultureTag",
"required": true,
"explanation": "a IETF BCP 47 language tag"
}
]
},
{
"traitName": "means.measurement.currencyCode",
"extendsTrait": "means.measurement",
"explanation": "indicates this value represents an ISO 4217 currency code"
},
{
"dataTypeName": "currencyCode",
"extendsDataType": "stringFormat",
"explanation": "value is a ISO 4217 currency code",
"exhibitsTraits": [
"means.measurement.currencyCode"
]
},
{
"traitName": "is.inCurrency",
"extendsTrait": "is",
"explanation": "the data represents an amount of the specified currency",
"hasParameters": [
{
"name": "code",
"dataType": "currencyCode",
"required": true,
"explanation": "ISO 4217 currency code"
}
]
},
{
"traitName": "means.formatting.stringFormat",
"extendsTrait": "means.formatting",
"explanation": "indicates this value represents the format of a string"
},
{
"dataTypeName": "stringFormat",
"extendsDataType": "string",
"explanation": "a string representing the format used to encode data in another string",
"exhibitsTraits": [
"means.formatting.stringFormat"
]
},
{
"traitName": "is.formatted.text",
"extendsTrait": "is.formatted",
"explanation": "string data is formatted according to the format parameter",
"hasParameters": [
{
"name": "format",
"dataType": "stringFormat",
"required": true,
"explanation": "String indicating the format of the data"
}
]
},
{
"traitName": "is.formatted.dateTime",
"extendsTrait": "is.formatted",
"explanation": "dateTime data formatted as a string in ISO 8601 format",
"hasParameters": [
{
"name": "format",
"dataType": "stringFormat",
"defaultValue": "YYYY-MM-DDThh:mmZ"
}
]
},
{
"traitName": "is.formatted.date",
"extendsTrait": "is.formatted",
"explanation": "date data formatted as a string in ISO 8601 format",
"hasParameters": [
{
"name": "format",
"dataType": "stringFormat",
"defaultValue": "YYYY-MM-DD"
}
]
},
{
"traitName": "is.formatted.time",
"extendsTrait": "is.formatted",
"explanation": "time data formatted as a string in ISO 8601 format",
"hasParameters": [
{
"name": "format",
"dataType": "stringFormat",
"defaultValue": "hh:mm:ss"
}
]
},
{
"traitName": "is.inTimeZone",
"extendsTrait": "is",
"explanation": "the associated data is assumed to be in the specified time zone",
"hasParameters": [
{
"name": "timeZoneName",
"dataType": "timezone",
"required": true,
"explanation": "the name of a time zone"
},
{
"name": "format",
"dataType": "stringFormat",
"required": true,
"explanation": "the time zone naming scheme used for the timeZoneName parameter"
}
]
},
{
"traitName": "is.inTimeZone.MicrosoftFormat",
"extendsTrait": {
"traitReference": "is.inTimeZone",
"arguments": [
{
"name": "format",
"value": "MicrosoftFormat"
}
]
},
"explanation": "the associated data is assumed to be in the specified time zone. timeZoneName value is a Microsoft standard time zone name. see support.microsoft.com/.../973627"
},
{
"traitName": "is.inTimeZone.tzDatabaseFormat",
"extendsTrait": {
"traitReference": "is.inTimeZone",
"arguments": [
{
"name": "format",
"value": "tzDatabaseFormat"
}
]
},
"explanation": "the associated data is assumed to be in the specified time zone. timeZoneName value is a Time Zone Database standard time zone name. see www.iana.org/time-zones"
}
]
}
Then create the cdp.manifest.cdm.json file - it's the one I showed above but here are examples of initial file names that reference actual Parquet files.
{
"manifestName": "YOUR CUSTOMER SANBOX or PRODUCTION",
"entities": [
{
"type": "LocalEntity",
"entityName": "CustomerAlternate",
"entityPath": "CustomerAlternate.cdm.json/CustomerAlternate",
"dataPartitions": [
{
"location": "/Customer/CICustomer.parquet",
"exhibitsTraits": [
{
"traitReference": "is.partition.format.parquet"
}
]
}
]
},
{
"type": "LocalEntity",
"entityName": "Order",
"entityPath": "Order.cdm.json/Order",
"dataPartitions": [
{
"location": "/Activities/Order/CIOrders.parquet",
"exhibitsTraits": [
{
"traitReference": "is.partition.format.parquet"
}
]
}
]
},
{
"type": "LocalEntity",
"entityName": "Customer",
"entityPath": "Customer.cdm.json/Customer",
"dataPartitions": [
{
"location": "/Customer/CICustomer.parquet",
"exhibitsTraits": [
{
"traitReference": "is.partition.format.parquet"
}
]
}
]
},
{
"type": "LocalEntity",
"entityName": "OrderItem",
"entityPath": "OrderItem.cdm.json/OrderItem",
"dataPartitions": [
{
"location": "/Activities/OrderItem/CIOrderItem.parquet",
"exhibitsTraits": [
{
"traitReference": "is.partition.format.parquet"
}
]
}
]
}
],
"jsonSchemaSemanticVersion": "1.0.0",
"imports": [
{
"corpusPath": "ci.definitions.cdm.json"
},
{
"corpusPath": "/CustomerInsightsDefinitions/ci.definitions.1.0.cdm.json",
"moniker": "[CI Auto Import] Customer Insights definitions"
}
]
}
Just make sure you have those folders there. There are a lot of nuances, there are a lot of gotchas but if it saves, then you''re good to go. You should be able to edit and create enities after this. Obviously adjust the file references and exclude to just one entity to begin with - that's the main parquet file you mentioned. These are both from live instances with just container names changed. As long as CI can access the lake, you should be good. Let me know if you have any problem.