Creation

Create New Datasets

Create new datasets, configure data sources, and set up column mappings for process mining analysis.

Create New Dataset

POST /api/{tenantId}/{projectId}/dataset

Creates a new dataset container with metadata and column definitions. This establishes the structure for subsequent data import operations.

Parameters

Parameter Type Location Description
tenantId GUID Path The tenant identifier
projectId GUID Path The project identifier

Request Body

{
  "datasetName": "Purchase Order Process Events",
  "description": "Event log data from the procurement system",
  "datasetType": "EventLog",
  "columns": [
    {
      "name": "CaseID",
      "type": "string",
      "role": "case_id",
      "required": true,
      "description": "Unique identifier for each purchase order"
    },
    {
      "name": "Activity",
      "type": "string",
      "role": "activity",
      "required": true,
      "description": "Process activity name"
    },
    {
      "name": "Timestamp",
      "type": "datetime",
      "role": "timestamp",
      "required": true,
      "format": "ISO8601",
      "description": "When the activity occurred"
    },
    {
      "name": "Resource",
      "type": "string",
      "role": "resource",
      "required": false,
      "description": "Person or system performing the activity"
    },
    {
      "name": "Amount",
      "type": "decimal",
      "role": "attribute",
      "required": false,
      "description": "Purchase order amount"
    }
  ],
  "metadata": {
    "source": "SAP ERP System",
    "extractionDate": "2024-01-15T10:30:00Z",
    "dataOwner": "Procurement Team"
  }
}

Response

{
  "datasetId": "550e8400-e29b-41d4-a716-446655440000",
  "datasetName": "Purchase Order Process Events",
  "description": "Event log data from the procurement system",
  "datasetType": "EventLog",
  "status": "Created",
  "columns": [
    {
      "columnId": "col-001",
      "name": "CaseID",
      "type": "string",
      "role": "case_id",
      "required": true,
      "description": "Unique identifier for each purchase order"
    }
  ],
  "rowCount": 0,
  "caseCount": 0,
  "activityCount": 0,
  "dateCreated": "2024-01-15T10:30:00Z",
  "createdBy": "user123"
}

Create Dataset from Template

POST /api/{tenantId}/{projectId}/dataset/template/{templateId}

Creates a new dataset using a predefined template with standard column mappings for common process types.

Request Body

{
  "datasetName": "Invoice Processing Events",
  "description": "Accounts payable process event log",
  "customizations": {
    "additionalColumns": [
      {
        "name": "InvoiceAmount",
        "type": "decimal",
        "role": "attribute",
        "description": "Invoice total amount"
      }
    ],
    "metadata": {
      "source": "Finance System",
      "department": "Accounts Payable"
    }
  }
}

Response

Returns the created dataset object with template columns and any customizations applied.

List Dataset Templates

GET /api/{tenantId}/{projectId}/dataset/templates

Retrieves available dataset templates with predefined column structures for common business processes.

Response

{
  "templates": [
    {
      "templateId": "template-001",
      "name": "Standard Event Log",
      "description": "Basic event log with case, activity, and timestamp",
      "category": "General",
      "columns": [
        {
          "name": "CaseID",
          "type": "string",
          "role": "case_id",
          "required": true
        },
        {
          "name": "Activity",
          "type": "string",
          "role": "activity",
          "required": true
        },
        {
          "name": "Timestamp",
          "type": "datetime",
          "role": "timestamp",
          "required": true
        }
      ]
    },
    {
      "templateId": "template-002",
      "name": "Purchase-to-Pay Process",
      "description": "Standard P2P process with supplier and cost data",
      "category": "Finance",
      "columns": [
        {
          "name": "PurchaseOrderID",
          "type": "string",
          "role": "case_id",
          "required": true
        },
        {
          "name": "ActivityName",
          "type": "string",
          "role": "activity",
          "required": true
        },
        {
          "name": "EventTime",
          "type": "datetime",
          "role": "timestamp",
          "required": true
        },
        {
          "name": "Supplier",
          "type": "string",
          "role": "attribute",
          "required": false
        },
        {
          "name": "Amount",
          "type": "decimal",
          "role": "attribute",
          "required": false
        }
      ]
    }
  ]
}

Column Types and Roles

Understanding column types and roles for proper dataset structure:

Required Roles

Essential columns for process mining analysis.

  • case_id: Unique process instance identifier
  • activity: Name of the process step
  • timestamp: When the activity occurred

Optional Roles

Additional data for enhanced analysis.

  • resource: Person or system performing activity
  • attribute: Case or event attributes
  • lifecycle: Activity lifecycle stage

Data Types

Supported column data types.

  • string: Text data
  • decimal: Numeric values
  • datetime: Date and time values
  • boolean: True/false values

Validation Rules

Data quality and validation options.

  • required: Non-null validation
  • format: Data format validation
  • constraints: Value range limits

JavaScript Example: Dataset Creation

class DatasetCreator {
  constructor(baseUrl, tenantId, projectId, token) {
    this.baseUrl = baseUrl;
    this.tenantId = tenantId;
    this.projectId = projectId;
    this.headers = {
      'Authorization': `Bearer ${token}`,
      'Content-Type': 'application/json'
    };
  }

  async createDataset(datasetConfig) {
    const url = `${this.baseUrl}/api/${this.tenantId}/${this.projectId}/dataset`;

    const response = await fetch(url, {
      method: 'POST',
      headers: this.headers,
      body: JSON.stringify(datasetConfig)
    });

    return await response.json();
  }

  async createFromTemplate(templateId, config) {
    const url = `${this.baseUrl}/api/${this.tenantId}/${this.projectId}/dataset/template/${templateId}`;

    const response = await fetch(url, {
      method: 'POST',
      headers: this.headers,
      body: JSON.stringify(config)
    });

    return await response.json();
  }

  async getTemplates() {
    const url = `${this.baseUrl}/api/${this.tenantId}/${this.projectId}/dataset/templates`;

    const response = await fetch(url, {
      headers: this.headers
    });

    return await response.json();
  }

  buildStandardColumns() {
    return [
      {
        name: 'CaseID',
        type: 'string',
        role: 'case_id',
        required: true,
        description: 'Unique process instance identifier'
      },
      {
        name: 'Activity',
        type: 'string',
        role: 'activity',
        required: true,
        description: 'Process activity name'
      },
      {
        name: 'Timestamp',
        type: 'datetime',
        role: 'timestamp',
        required: true,
        format: 'ISO8601',
        description: 'Activity occurrence time'
      },
      {
        name: 'Resource',
        type: 'string',
        role: 'resource',
        required: false,
        description: 'Performing resource'
      }
    ];
  }
}

// Usage example
const creator = new DatasetCreator(
  'https://your-mindzie-instance.com',
  'tenant-guid',
  'project-guid',
  'your-token'
);

// Create a custom dataset
const datasetConfig = {
  datasetName: 'Order Fulfillment Process',
  description: 'E-commerce order processing events',
  datasetType: 'EventLog',
  columns: creator.buildStandardColumns().concat([
    {
      name: 'OrderValue',
      type: 'decimal',
      role: 'attribute',
      required: false,
      description: 'Order total amount'
    }
  ]),
  metadata: {
    source: 'E-commerce Platform',
    extractionDate: new Date().toISOString()
  }
};

creator.createDataset(datasetConfig)
  .then(result => {
    console.log(`Created dataset: ${result.datasetId}`);
    console.log(`Name: ${result.datasetName}`);
  });

Python Example: Dataset Creation

import requests
import json
from datetime import datetime

class DatasetCreator:
    def __init__(self, base_url, tenant_id, project_id, token):
        self.base_url = base_url
        self.tenant_id = tenant_id
        self.project_id = project_id
        self.headers = {
            'Authorization': f'Bearer {token}',
            'Content-Type': 'application/json'
        }

    def create_dataset(self, name, description, columns, metadata=None):
        """Create a new dataset with custom configuration"""
        url = f"{self.base_url}/api/{self.tenant_id}/{self.project_id}/dataset"

        payload = {
            'datasetName': name,
            'description': description,
            'datasetType': 'EventLog',
            'columns': columns,
            'metadata': metadata or {}
        }

        response = requests.post(url, json=payload, headers=self.headers)
        return response.json()

    def create_from_template(self, template_id, name, description, customizations=None):
        """Create dataset from template"""
        url = f"{self.base_url}/api/{self.tenant_id}/{self.project_id}/dataset/template/{template_id}"

        payload = {
            'datasetName': name,
            'description': description,
            'customizations': customizations or {}
        }

        response = requests.post(url, json=payload, headers=self.headers)
        return response.json()

    def get_templates(self):
        """Get available dataset templates"""
        url = f"{self.base_url}/api/{self.tenant_id}/{self.project_id}/dataset/templates"
        response = requests.get(url, headers=self.headers)
        return response.json()

    def build_standard_columns(self, additional_columns=None):
        """Build standard process mining columns"""
        columns = [
            {
                'name': 'CaseID',
                'type': 'string',
                'role': 'case_id',
                'required': True,
                'description': 'Unique process instance identifier'
            },
            {
                'name': 'Activity',
                'type': 'string',
                'role': 'activity',
                'required': True,
                'description': 'Process activity name'
            },
            {
                'name': 'Timestamp',
                'type': 'datetime',
                'role': 'timestamp',
                'required': True,
                'format': 'ISO8601',
                'description': 'Activity occurrence time'
            },
            {
                'name': 'Resource',
                'type': 'string',
                'role': 'resource',
                'required': False,
                'description': 'Performing resource'
            }
        ]

        if additional_columns:
            columns.extend(additional_columns)

        return columns

# Example usage
creator = DatasetCreator(
    'https://your-mindzie-instance.com',
    'tenant-guid',
    'project-guid',
    'your-auth-token'
)

# Create dataset with custom columns
additional_cols = [
    {
        'name': 'Priority',
        'type': 'string',
        'role': 'attribute',
        'required': False,
        'description': 'Case priority level'
    },
    {
        'name': 'Cost',
        'type': 'decimal',
        'role': 'attribute',
        'required': False,
        'description': 'Activity cost'
    }
]

columns = creator.build_standard_columns(additional_cols)

dataset = creator.create_dataset(
    'Support Ticket Process',
    'Customer support ticket handling events',
    columns,
    {
        'source': 'ServiceDesk System',
        'extractionDate': datetime.now().isoformat(),
        'department': 'Customer Support'
    }
)

print(f"Created dataset: {dataset['datasetId']}")
print(f"Columns defined: {len(dataset['columns'])}")

# List available templates
templates = creator.get_templates()
print(f"\nAvailable templates: {len(templates['templates'])}")
for template in templates['templates']:
    print(f"- {template['name']}: {template['description']}")
An error has occurred. This application may no longer respond until reloaded. Reload ??