Create New Datasets
Create new datasets, configure data sources, and set up column mappings for process mining analysis.
Create New Dataset
POST /api/{tenantId}/{projectId}/dataset
Creates a new dataset container with metadata and column definitions. This establishes the structure for subsequent data import operations.
Parameters
| Parameter | Type | Location | Description |
|---|---|---|---|
tenantId |
GUID | Path | The tenant identifier |
projectId |
GUID | Path | The project identifier |
Request Body
{
"datasetName": "Purchase Order Process Events",
"description": "Event log data from the procurement system",
"datasetType": "EventLog",
"columns": [
{
"name": "CaseID",
"type": "string",
"role": "case_id",
"required": true,
"description": "Unique identifier for each purchase order"
},
{
"name": "Activity",
"type": "string",
"role": "activity",
"required": true,
"description": "Process activity name"
},
{
"name": "Timestamp",
"type": "datetime",
"role": "timestamp",
"required": true,
"format": "ISO8601",
"description": "When the activity occurred"
},
{
"name": "Resource",
"type": "string",
"role": "resource",
"required": false,
"description": "Person or system performing the activity"
},
{
"name": "Amount",
"type": "decimal",
"role": "attribute",
"required": false,
"description": "Purchase order amount"
}
],
"metadata": {
"source": "SAP ERP System",
"extractionDate": "2024-01-15T10:30:00Z",
"dataOwner": "Procurement Team"
}
}
Response
{
"datasetId": "550e8400-e29b-41d4-a716-446655440000",
"datasetName": "Purchase Order Process Events",
"description": "Event log data from the procurement system",
"datasetType": "EventLog",
"status": "Created",
"columns": [
{
"columnId": "col-001",
"name": "CaseID",
"type": "string",
"role": "case_id",
"required": true,
"description": "Unique identifier for each purchase order"
}
],
"rowCount": 0,
"caseCount": 0,
"activityCount": 0,
"dateCreated": "2024-01-15T10:30:00Z",
"createdBy": "user123"
}
Create Dataset from Template
POST /api/{tenantId}/{projectId}/dataset/template/{templateId}
Creates a new dataset using a predefined template with standard column mappings for common process types.
Request Body
{
"datasetName": "Invoice Processing Events",
"description": "Accounts payable process event log",
"customizations": {
"additionalColumns": [
{
"name": "InvoiceAmount",
"type": "decimal",
"role": "attribute",
"description": "Invoice total amount"
}
],
"metadata": {
"source": "Finance System",
"department": "Accounts Payable"
}
}
}
Response
Returns the created dataset object with template columns and any customizations applied.
List Dataset Templates
GET /api/{tenantId}/{projectId}/dataset/templates
Retrieves available dataset templates with predefined column structures for common business processes.
Response
{
"templates": [
{
"templateId": "template-001",
"name": "Standard Event Log",
"description": "Basic event log with case, activity, and timestamp",
"category": "General",
"columns": [
{
"name": "CaseID",
"type": "string",
"role": "case_id",
"required": true
},
{
"name": "Activity",
"type": "string",
"role": "activity",
"required": true
},
{
"name": "Timestamp",
"type": "datetime",
"role": "timestamp",
"required": true
}
]
},
{
"templateId": "template-002",
"name": "Purchase-to-Pay Process",
"description": "Standard P2P process with supplier and cost data",
"category": "Finance",
"columns": [
{
"name": "PurchaseOrderID",
"type": "string",
"role": "case_id",
"required": true
},
{
"name": "ActivityName",
"type": "string",
"role": "activity",
"required": true
},
{
"name": "EventTime",
"type": "datetime",
"role": "timestamp",
"required": true
},
{
"name": "Supplier",
"type": "string",
"role": "attribute",
"required": false
},
{
"name": "Amount",
"type": "decimal",
"role": "attribute",
"required": false
}
]
}
]
}
Column Types and Roles
Understanding column types and roles for proper dataset structure:
Required Roles
Essential columns for process mining analysis.
- case_id: Unique process instance identifier
- activity: Name of the process step
- timestamp: When the activity occurred
Optional Roles
Additional data for enhanced analysis.
- resource: Person or system performing activity
- attribute: Case or event attributes
- lifecycle: Activity lifecycle stage
Data Types
Supported column data types.
- string: Text data
- decimal: Numeric values
- datetime: Date and time values
- boolean: True/false values
Validation Rules
Data quality and validation options.
- required: Non-null validation
- format: Data format validation
- constraints: Value range limits
JavaScript Example: Dataset Creation
class DatasetCreator {
constructor(baseUrl, tenantId, projectId, token) {
this.baseUrl = baseUrl;
this.tenantId = tenantId;
this.projectId = projectId;
this.headers = {
'Authorization': `Bearer ${token}`,
'Content-Type': 'application/json'
};
}
async createDataset(datasetConfig) {
const url = `${this.baseUrl}/api/${this.tenantId}/${this.projectId}/dataset`;
const response = await fetch(url, {
method: 'POST',
headers: this.headers,
body: JSON.stringify(datasetConfig)
});
return await response.json();
}
async createFromTemplate(templateId, config) {
const url = `${this.baseUrl}/api/${this.tenantId}/${this.projectId}/dataset/template/${templateId}`;
const response = await fetch(url, {
method: 'POST',
headers: this.headers,
body: JSON.stringify(config)
});
return await response.json();
}
async getTemplates() {
const url = `${this.baseUrl}/api/${this.tenantId}/${this.projectId}/dataset/templates`;
const response = await fetch(url, {
headers: this.headers
});
return await response.json();
}
buildStandardColumns() {
return [
{
name: 'CaseID',
type: 'string',
role: 'case_id',
required: true,
description: 'Unique process instance identifier'
},
{
name: 'Activity',
type: 'string',
role: 'activity',
required: true,
description: 'Process activity name'
},
{
name: 'Timestamp',
type: 'datetime',
role: 'timestamp',
required: true,
format: 'ISO8601',
description: 'Activity occurrence time'
},
{
name: 'Resource',
type: 'string',
role: 'resource',
required: false,
description: 'Performing resource'
}
];
}
}
// Usage example
const creator = new DatasetCreator(
'https://your-mindzie-instance.com',
'tenant-guid',
'project-guid',
'your-token'
);
// Create a custom dataset
const datasetConfig = {
datasetName: 'Order Fulfillment Process',
description: 'E-commerce order processing events',
datasetType: 'EventLog',
columns: creator.buildStandardColumns().concat([
{
name: 'OrderValue',
type: 'decimal',
role: 'attribute',
required: false,
description: 'Order total amount'
}
]),
metadata: {
source: 'E-commerce Platform',
extractionDate: new Date().toISOString()
}
};
creator.createDataset(datasetConfig)
.then(result => {
console.log(`Created dataset: ${result.datasetId}`);
console.log(`Name: ${result.datasetName}`);
});
Python Example: Dataset Creation
import requests
import json
from datetime import datetime
class DatasetCreator:
def __init__(self, base_url, tenant_id, project_id, token):
self.base_url = base_url
self.tenant_id = tenant_id
self.project_id = project_id
self.headers = {
'Authorization': f'Bearer {token}',
'Content-Type': 'application/json'
}
def create_dataset(self, name, description, columns, metadata=None):
"""Create a new dataset with custom configuration"""
url = f"{self.base_url}/api/{self.tenant_id}/{self.project_id}/dataset"
payload = {
'datasetName': name,
'description': description,
'datasetType': 'EventLog',
'columns': columns,
'metadata': metadata or {}
}
response = requests.post(url, json=payload, headers=self.headers)
return response.json()
def create_from_template(self, template_id, name, description, customizations=None):
"""Create dataset from template"""
url = f"{self.base_url}/api/{self.tenant_id}/{self.project_id}/dataset/template/{template_id}"
payload = {
'datasetName': name,
'description': description,
'customizations': customizations or {}
}
response = requests.post(url, json=payload, headers=self.headers)
return response.json()
def get_templates(self):
"""Get available dataset templates"""
url = f"{self.base_url}/api/{self.tenant_id}/{self.project_id}/dataset/templates"
response = requests.get(url, headers=self.headers)
return response.json()
def build_standard_columns(self, additional_columns=None):
"""Build standard process mining columns"""
columns = [
{
'name': 'CaseID',
'type': 'string',
'role': 'case_id',
'required': True,
'description': 'Unique process instance identifier'
},
{
'name': 'Activity',
'type': 'string',
'role': 'activity',
'required': True,
'description': 'Process activity name'
},
{
'name': 'Timestamp',
'type': 'datetime',
'role': 'timestamp',
'required': True,
'format': 'ISO8601',
'description': 'Activity occurrence time'
},
{
'name': 'Resource',
'type': 'string',
'role': 'resource',
'required': False,
'description': 'Performing resource'
}
]
if additional_columns:
columns.extend(additional_columns)
return columns
# Example usage
creator = DatasetCreator(
'https://your-mindzie-instance.com',
'tenant-guid',
'project-guid',
'your-auth-token'
)
# Create dataset with custom columns
additional_cols = [
{
'name': 'Priority',
'type': 'string',
'role': 'attribute',
'required': False,
'description': 'Case priority level'
},
{
'name': 'Cost',
'type': 'decimal',
'role': 'attribute',
'required': False,
'description': 'Activity cost'
}
]
columns = creator.build_standard_columns(additional_cols)
dataset = creator.create_dataset(
'Support Ticket Process',
'Customer support ticket handling events',
columns,
{
'source': 'ServiceDesk System',
'extractionDate': datetime.now().isoformat(),
'department': 'Customer Support'
}
)
print(f"Created dataset: {dataset['datasetId']}")
print(f"Columns defined: {len(dataset['columns'])}")
# List available templates
templates = creator.get_templates()
print(f"\nAvailable templates: {len(templates['templates'])}")
for template in templates['templates']:
print(f"- {template['name']}: {template['description']}")