{"templateId":"markdown","sharedDataIds":{"sidebar":"sidebar-sidebars.yaml"},"props":{"metadata":{"markdoc":{"tagList":[]},"redocly_category":"Products","product_name":"Machine Learning","type":"markdown"},"seo":{"title":"ML Datasets","description":"Treasure Data Product Documentation · Collect and Unify · Segment and Activate · Experiment and Analyze · Decisioning Automate with AI Scale and Trust.","siteUrl":"https://docs.treasuredata.com","lang":"en-US","llmstxt":{"hide":false,"sections":[{"title":"Table of contents","includeFiles":["**/*"],"excludeFiles":[]}],"excludeFiles":[]}},"dynamicMarkdocComponents":[],"compilationErrors":[],"ast":{"$$mdtype":"Tag","name":"article","attributes":{},"children":[{"$$mdtype":"Tag","name":"Heading","attributes":{"level":1,"id":"ml-datasets","__idx":0},"children":["ML Datasets"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["This notebook generates sample ML datasets in the specified output database."]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":3,"id":"workflow-example","__idx":1},"children":["Workflow Example"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Find a sample workflow ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://github.com/treasure-data/treasure-boxes/blob/automl/machine-learning-box/automl/ml_datasets.dig"},"children":["here in Treasure Boxes"]},"."]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"yaml","header":{"controls":{"copy":{}}},"source":"+load_datasets:\n  ipynb>:\n    notebook: ml_datasets\n    output_database: ml_datasets\n    datasets: all  \n","lang":"yaml"},"children":[]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":3,"id":"parameters","__idx":2},"children":["Parameters"]},{"$$mdtype":"Tag","name":"div","attributes":{"className":"md-table-wrapper"},"children":[{"$$mdtype":"Tag","name":"table","attributes":{"className":"md"},"children":[{"$$mdtype":"Tag","name":"thead","attributes":{},"children":[{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"th","attributes":{"data-label":"Parameter name"},"children":["Parameter name"]},{"$$mdtype":"Tag","name":"th","attributes":{"data-label":"Parameter on Console"},"children":["Parameter on Console"]},{"$$mdtype":"Tag","name":"th","attributes":{"data-label":"Description"},"children":["Description"]},{"$$mdtype":"Tag","name":"th","attributes":{"data-label":"Default Value"},"children":["Default Value"]}]}]},{"$$mdtype":"Tag","name":"tbody","attributes":{},"children":[{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["docker.task_mem"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Docker Task Mem"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Task memory size. Available values are 64g, 128g (default), 256g, 384g, or 512g depending on your contracted tiers."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["128g"]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["datasets"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Datasets"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["An \"all\" or comma separated string to specify datasets to set up."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["all"]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["output_database"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Output Database"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Dataset name to set up."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["ml_datasets"]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["replace_if_exists"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Replace If Exists"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Replace a table if it already exists. Set to false by default."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["false"]}]}]}]}]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":3,"id":"dataset-description","__idx":3},"children":["Dataset Description"]},{"$$mdtype":"Tag","name":"div","attributes":{"className":"md-table-wrapper"},"children":[{"$$mdtype":"Tag","name":"table","attributes":{"className":"md"},"children":[{"$$mdtype":"Tag","name":"thead","attributes":{},"children":[{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"th","attributes":{"data-label":"Dataset"},"children":["Dataset"]},{"$$mdtype":"Tag","name":"th","attributes":{"data-label":"Description"},"children":["Description"]},{"$$mdtype":"Tag","name":"th","attributes":{"data-label":"Associated Tasks"},"children":["Associated Tasks"]},{"$$mdtype":"Tag","name":"th","attributes":{"data-label":"Target Column"},"children":["Target Column"]},{"$$mdtype":"Tag","name":"th","attributes":{"data-label":"Number of Columns"},"children":["Number of Columns"]},{"$$mdtype":"Tag","name":"th","attributes":{"data-label":"Number of Rows"},"children":["Number of Rows"]}]}]},{"$$mdtype":"Tag","name":"tbody","attributes":{},"children":[{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://auto.gluon.ai/stable/tutorials/tabular/tabular-indepth.html"},"children":["gluon"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["AutoGluon example dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Binary / Multiclass classification"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["class (binary), occupation (multiclass)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["15"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["39,073 (train), 9,769 (test)"]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://www.kaggle.com/datasets/ruthgn/bank-marketing-data-set"},"children":["bank_marketing"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Bank marketing dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Predict if the client will subscribe a term deposit."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Binary classification"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["y"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["21"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["28,831 (train), 12,357(test)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://archive.ics.uci.edu/dataset/603/in+vehicle+coupon+recommendation"},"children":["vehicle_coupon"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Vehicle coupon recommendation dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Recommend a coupon to driver on different scenarios."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Multiclass classification"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["coupon"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["26"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["8,878 (train), 3,806 (test)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://archive.ics.uci.edu/ml/datasets/Online+Retail"},"children":["online_retail"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Online retail transactional dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Predict LTV score for each customer."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Regression (CLTV prediction), RFM"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["cltv"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["11"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["2,230 (train), 956 (test)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://www.kaggle.com/blastchar/telco-customer-churn/data"},"children":["telco_churn"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Telco churn event dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Binary classification (Churn prediction)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["churn"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["21"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["4,930 (train), 2,113 (test)"]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset"},"children":["california_house"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["House price dataset of California."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Predict house prices."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Regression"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["median_house_value"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["10"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["14,448 (train), 6,192 (test)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["transition_matrix"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Sample transition dataset of web access."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Analyze web access transitions."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Network Analysis"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["-"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["3"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["12"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://www.sktime.net/en/stable/api_reference/auto_generated/sktime.datasets.load_airline.html"},"children":["ts_airline"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Time-series airline passenger dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Forecast the number of passengers."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Time-series Forecasting (Univariate)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["number_of_airline_passengers"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["2"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["100 (train), 44 (test)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://www.kaggle.com/datasets/yogesh94/m4-forecasting-competition-dataset"},"children":["m4"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Quarterly time series of M4 dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Time-series Forecasting (Multivariate)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["v7 (or any v?)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["867"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["33,600 (train), 14,400 (test)"]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["nba"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Next-Best-Action dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Next Best Action"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["-"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["6"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["43,196 (train), 12,829 (test)"]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://dp6.github.io/Marketing-Attribution-Models/"},"children":["mta"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["DP6 dataset for marketing attribution models."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Multi-Touch Attribution"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["-"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["4"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["500,000"]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://archive.ics.uci.edu/ml/datasets/dermatology"},"children":["dermatology"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Dermatology diseases dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Determine 6 types of Eryhemato-Squamous disease."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Multi-class classification, Clustering"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["class"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["35"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["366"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud"},"children":["creditcard"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Credit card fraud dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Predict anonymized transactions as fraudulent or genuine."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Binary classification (Fraud detection)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["fraud"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["29"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["199,364 (train), 85,443 (test)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"http://glaros.dtc.umn.edu/gkhome/views/cluto"},"children":["cluto"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Cluto dataset for clustering."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Clustering"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["class"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["3"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["10,000"]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://archive.ics.uci.edu/dataset/31/covertype"},"children":["covtype"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Forestcover type dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Classification of pixels into 7 forest cover types."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Multiclass classification"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["target"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["55"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["406,708 (train), 174,304(test)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"http://qwone.com/~jason/20Newsgroups/"},"children":["20newsgroups"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["20 newsgroup documents dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["This data set comes from data in 20 different newsgroups."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Multiclass classification"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["target"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["301"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["11,314 (train), 7,532 (test)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["4,871 (inbalanced train)"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://www.kaggle.com/datasets/mkechinov/ecommerce-events-history-in-cosmetics-shop"},"children":["cosmetics_store"]}]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Cosmetics shop e-commerce events history dataset."]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["RFM analysis, Clustering"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["-"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["5"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["1,287,007"]}]}]}]}]}]},"headings":[{"value":"ML Datasets","id":"ml-datasets","depth":1},{"value":"Workflow Example","id":"workflow-example","depth":3},{"value":"Parameters","id":"parameters","depth":3},{"value":"Dataset Description","id":"dataset-description","depth":3}],"frontmatter":{"seo":{"title":"ML Datasets"}},"lastModified":"2026-01-27T10:05:25.000Z","pagePropGetterError":{"message":"","name":""}},"slug":"/products/customer-data-platform/machine-learning/automl/notebook-solutions/ml-datasets","userData":{"isAuthenticated":false,"teams":["anonymous"]},"isPublic":true}