{"templateId":"markdown","sharedDataIds":{"sidebar":"sidebar-sidebars.yaml"},"props":{"metadata":{"markdoc":{"tagList":["admonition"]},"redocly_category":"Guides","type":"markdown"},"seo":{"title":"pytd Quickstart","description":"Treasure Data Product Documentation · Collect and Unify · Segment and Activate · Experiment and Analyze · Decisioning Automate with AI Scale and Trust.","siteUrl":"https://docs.treasuredata.com","lang":"en-US","llmstxt":{"hide":false,"sections":[{"title":"Table of contents","includeFiles":["**/*"],"excludeFiles":[]}],"excludeFiles":[]}},"dynamicMarkdocComponents":[],"compilationErrors":[],"ast":{"$$mdtype":"Tag","name":"article","attributes":{},"children":[{"$$mdtype":"Tag","name":"Heading","attributes":{"level":1,"id":"pytd-quickstart","__idx":0},"children":["pytd Quickstart"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["pytd"]}," provides user-friendly interfaces to Treasure Data’s ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://docs.treasuredata.com/tools/cli-and-sdks/td-toolbelt"},"children":["REST APIs"]},", ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://docs.treasuredata.com/products/customer-data-platform/data-workbench/queries/trino/quickstart"},"children":["Trino query engine"]},", and ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://www.slideshare.net/treasure-data/td-techplazma"},"children":["Plazma primary storage"]},". The seamless connection allows your Python code to efficiently read and write a large volume of data to and from Treasure Data."]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["We recommend using iPython with this library for the best experience during development."]},{"$$mdtype":"Tag","name":"Admonition","attributes":{"type":"info","name":"Info"},"children":[{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Treasure Data is no longer accepting new users for the Plazma Public API."]}]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":1,"id":"setup","__idx":1},"children":["Setup"]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":2,"id":"installing","__idx":2},"children":["Installing"]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"shell","header":{"controls":{"copy":{}}},"source":"pip install pytd\n","lang":"shell"},"children":[]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":2,"id":"initializing-client","__idx":3},"children":["Initializing Client"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Set your ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"/products/my-settings/getting-your-api-keys"},"children":["Treasure API key"]}," and ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"/apis/endpoints/endpoints"},"children":["endpoint"]}," to the environment variables, ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["TD_API_KEY"]}," and ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["TD_API_SERVER"]},", respectively, and create a client instance."]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["You can also optionally set the query engine to use for all queries. The default query engine is Trino."]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"python","header":{"controls":{"copy":{}}},"source":"import pytd\n\nclient = pytd.Client(database='sample_datasets')\n\n# or, hard-code your API key, endpoint, and/or query engine:\npytd.Client(apikey='X/XXX',\n            endpoint='https://api.treasuredata.com/',\n            database='sample_datasets',\n            default_engine='presto')\n","lang":"python"},"children":[]},{"$$mdtype":"Tag","name":"Admonition","attributes":{"type":"info","name":"Info"},"children":[{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Note that the default engine is set to ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["presto"]},". To use Hive you can either"]},{"$$mdtype":"Tag","name":"ol","attributes":{},"children":[{"$$mdtype":"Tag","name":"li","attributes":{},"children":["Set the ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["default_engine='hive'"]}," for all queries at initialization."]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["Pass the ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["engine='hive'"]}," parameter at run time."]}]}]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":1,"id":"basic-use","__idx":4},"children":["Basic Use"]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":2,"id":"running-a-query","__idx":5},"children":["Running a Query"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["All queries in Treasure Data either run on Trino or Hive. You can specify the query engine at client initialization or per query at run time."]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["The default engine is Trino."]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"python","header":{"controls":{"copy":{}}},"source":"client.query('select symbol, count(1) as cnt from nasdaq group by 1 order by 1')\n# {'columns': ['symbol', 'cnt'], 'data': [['AAIT', 590], ['AAL', 82], ['AAME', 9252], ..., ['ZUMZ', 2364]]}\n","lang":"python"},"children":[]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["When you want to run Hive query, you should pass ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["hive"]}," to engine option."]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"python","header":{"controls":{"copy":{}}},"source":"client.query('select hivemall_version()', engine='hive')\n# {'columns': ['_c0'], 'data': [['0.6.0-SNAPSHOT-201901-r01']]} (as of Feb, 2019)\n","lang":"python"},"children":[]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":2,"id":"loading-a-dataframe-from-treasure-data","__idx":6},"children":["Loading a DataFrame from Treasure Data"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["When pulling data from Treasure Data all data must be represented as ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["pandas.DataFrame"]},". To do this we will use functions from the ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["pandas_td"]}," sub library."]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":3,"id":"load-table","__idx":7},"children":["Load Table"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["To read a Treasure Data table into a pandas DataFrame use the pandas ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://pytd-doc.readthedocs.io/en/latest/pandas_td.html?highlight=read_td_table#pytd.pandas_td.read_td_table"},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["read_td_table"]}," function"]},"."]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Note that ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["engine"]}," is a Class returned by the ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://pytd-doc.readthedocs.io/en/latest/pandas_td.html#pytd.pandas_td.create_engine"},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["create_engine"]}," function"]},"."]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"python","header":{"controls":{"copy":{}}},"source":"import pytd.pandas_td as td\n\n# Assumes TD_API_KEY and TD_API_SERVER env variables are set\nengine = td.create_engine(\"presto:my_db\")\n\ndf = td.read_td_table('table_name', engine, limit=10000)\n\n\n","lang":"python"},"children":[]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"python","header":{"controls":{"copy":{}}},"source":"import pytd.pandas_td as td\n\n# Declare API Key and Endpoint manually\ncon = td.connect(apikey='XXX', endpoint=\"https://api.treasuredata.com\")\nengine = td.create_engine(\"presto:my_db\", con=con)\n\ndf = td.read_td_table('table_name', engine, limit=10000)\n","lang":"python"},"children":[]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":3,"id":"load-from-query","__idx":8},"children":["Load from Query"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["To run a query and load the output into a dataframe use the ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://pytd-doc.readthedocs.io/en/latest/pandas_td.html?highlight=read_td_query#pytd.pandas_td.read_td_query"},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["read_td_query"]}," function"]},"."]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"python","header":{"controls":{"copy":{}}},"source":"query='SELECT foo FROM bar'\ndf = td.read_td_query(query,\n                      engine,\n                      index_col=None,\n                      parse_dates=None,\n                      distributed_join=False,\n                      params=None)\n","lang":"python"},"children":[]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["read_td_query"]}," takes the following parameters:"]},{"$$mdtype":"Tag","name":"ul","attributes":{},"children":[{"$$mdtype":"Tag","name":"li","attributes":{},"children":["query (",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["String"]},") — SQL string to be executed"]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["engine — For example, ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["presto_engine"]}]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["index_col"]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["parse_dates (",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["Array"]},", ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["none"]},") — For Array, column names must be given to parse as dates."]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["distributed_join (",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["true"]},", ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["false"]},") — (Trino only) If true, distributed join is enabled. If false (default), broadcast join is used."]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["Params"]}]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["The ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["priority"]}," parameter can be set by using syntax similar to the following:"]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"header":{"controls":{"copy":{}}},"source":"df_2 = td.read_td('SELECT foo FROM bar',\n                  engine,\n                  params={'priority':1})\n# Note priority is ordered highest 2,1,0,-1,-2 lowest\n"},"children":[]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":2,"id":"writing-dataframe-to-treasure-data","__idx":9},"children":["Writing DataFrame to Treasure Data"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["To write data to Treasure Data you simply use the ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://pytd-doc.readthedocs.io/en/latest/generated/pytd.Client.html?highlight=load_table_from_dataframe#pytd.Client.load_table_from_dataframe"},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["load_table_from_dataframe"]}," function"]}," to write a locally defined pandas dataframe to a table."]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"header":{"controls":{"copy":{}}},"source":"import pandas as pd\n\ndf = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 10]})\n\nclient.load_table_from_dataframe(df,\n                                'table_name',\n                                writer='bulk_import',\n                                if_exists='overwrite')\n"},"children":[]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":[{"$$mdtype":"Tag","name":"strong","attributes":{},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["writer"]}," parameter"]}]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["For the ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["writer"]}," parameter, pytd supports different ways to ingest data to Treasure Data:"]},{"$$mdtype":"Tag","name":"ol","attributes":{},"children":[{"$$mdtype":"Tag","name":"li","attributes":{},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["bulk_import"]}," - Convert data into a CSV file and upload in the batch fashion."]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["insert_into"]}," - Insert every single row in ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["DataFrame"]}," by issuing an ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["INSERT INTO"]}," query through the Trino query engine. Recommended only for a small volume of data."]}]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Characteristics of each of these methods can be summarized as follows:"]},{"$$mdtype":"Tag","name":"div","attributes":{"className":"md-table-wrapper"},"children":[{"$$mdtype":"Tag","name":"table","attributes":{"className":"md"},"children":[{"$$mdtype":"Tag","name":"thead","attributes":{},"children":[{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"th","attributes":{"data-label":""},"children":[]},{"$$mdtype":"Tag","name":"th","attributes":{"data-label":"bulk_import"},"children":["bulk_import"]},{"$$mdtype":"Tag","name":"th","attributes":{"data-label":"insert_into"},"children":["insert_into"]}]}]},{"$$mdtype":"Tag","name":"tbody","attributes":{},"children":[{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Scalable against data volume"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["✓"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Write performance for larger data"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Memory efficient"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["✓"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["✓"]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Disk efficient"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":[]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["✓"]}]},{"$$mdtype":"Tag","name":"tr","attributes":{},"children":[{"$$mdtype":"Tag","name":"td","attributes":{},"children":["Minimal package dependency"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["✓"]},{"$$mdtype":"Tag","name":"td","attributes":{},"children":["✓"]}]}]}]}]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":[{"$$mdtype":"Tag","name":"strong","attributes":{},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["if_exists"]}," parameter"]}]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["For the ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["if_exists"]}," parameter, pytd supports 4 different behavior patterns."]},{"$$mdtype":"Tag","name":"ul","attributes":{},"children":[{"$$mdtype":"Tag","name":"li","attributes":{},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["error"]},": raise an exception"]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["overwrite"]},": drop it, recreate it, and insert data"]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["append"]},": insert data (create if does not exist)"]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["ignore"]},": do nothing"]}]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":1,"id":"advanced-use","__idx":10},"children":["Advanced Use"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["For more advanced examples see the ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://colab.research.google.com/drive/1ps_ChU-H2FvkeNlj1e1fcOebCt4ryN11"},"children":["sample code on Google Colaboratory"]},"."]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":1,"id":"further-reading","__idx":11},"children":["Further Reading"]},{"$$mdtype":"Tag","name":"ul","attributes":{},"children":[{"$$mdtype":"Tag","name":"li","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"/tools/pytd/pytd-vs-td-client-python-vs-pandas-td"},"children":["Choosing between PyTD, td-client-python, and Pandas-TD"]}," - Different libraries for different needs."]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://pytd-doc.readthedocs.io/en/latest/reference.html"},"children":["PyTreasure API Reference"]}]}]}]},"headings":[{"value":"pytd Quickstart","id":"pytd-quickstart","depth":1},{"value":"Setup","id":"setup","depth":1},{"value":"Installing","id":"installing","depth":2},{"value":"Initializing Client","id":"initializing-client","depth":2},{"value":"Basic Use","id":"basic-use","depth":1},{"value":"Running a Query","id":"running-a-query","depth":2},{"value":"Loading a DataFrame from Treasure Data","id":"loading-a-dataframe-from-treasure-data","depth":2},{"value":"Load Table","id":"load-table","depth":3},{"value":"Load from Query","id":"load-from-query","depth":3},{"value":"Writing DataFrame to Treasure Data","id":"writing-dataframe-to-treasure-data","depth":2},{"value":"Advanced Use","id":"advanced-use","depth":1},{"value":"Further Reading","id":"further-reading","depth":1}],"frontmatter":{"seo":{"title":"pytd Quickstart"}},"lastModified":"2026-06-01T09:09:59.000Z","pagePropGetterError":{"message":"","name":""}},"slug":"/tools/pytd","userData":{"isAuthenticated":false,"teams":["anonymous"]},"isPublic":true}