{"templateId":"markdown","sharedDataIds":{"sidebar":"sidebar-sidebars.yaml"},"props":{"metadata":{"markdoc":{"tagList":[]},"redocly_category":"Integrations","type":"markdown"},"seo":{"title":"Embulk Bulk Import From Aws S3","description":"Treasure Data Product Documentation · Collect and Unify · Segment and Activate · Experiment and Analyze · Decisioning Automate with AI Scale and Trust.","siteUrl":"https://docs.treasuredata.com","lang":"en-US","llmstxt":{"hide":false,"sections":[{"title":"Table of contents","includeFiles":["**/*"],"excludeFiles":[]}],"excludeFiles":[]}},"dynamicMarkdocComponents":[],"compilationErrors":[],"ast":{"$$mdtype":"Tag","name":"article","attributes":{},"children":[{"$$mdtype":"Tag","name":"Heading","attributes":{"level":1,"id":"embulk-bulk-import-from-aws-s3","__idx":0},"children":["Embulk Bulk Import From Aws S3"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["You can import files from your AWS S3 bucket to Treasure Data using embulk-input-s3 input plugin."]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Continue to the following topics:"]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":2,"id":"prerequisites","__idx":1},"children":["Prerequisites"]},{"$$mdtype":"Tag","name":"ul","attributes":{},"children":[{"$$mdtype":"Tag","name":"li","attributes":{},"children":["Basic knowledge of Treasure Data."]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["Basic knowledge of ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"http://www.embulk.org/docs/"},"children":["Embulk"]},"."]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["Follow the instructions in ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"/products/customer-data-platform/integration-hub/batch/import/bulk-data-import#installing-bulk-data-import"},"children":["Installing Bulk Data Import"]},"."]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":[{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"http://www.embulk.org/docs/"},"children":["Embulk and embulk-output-td"]}," plugin installed on your machine."]}]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":2,"id":"install-embulk-input-s3-plugin","__idx":2},"children":["Install embulk-input-s3 Plugin"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["To install embulk-input-s3 plugin, run the following command:"]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"bash","header":{"controls":{"copy":{}}},"source":"embulk gem install embulk-input-s3\n","lang":"bash"},"children":[]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":2,"id":"create-a-seed-configuration-file","__idx":3},"children":["Create a Seed Configuration File"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Using your favorite text editor, create Embulk config file (for eg:seed.yml) defining input(S3) and ouput(TD) parameters. Example:"]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"yaml","header":{"controls":{"copy":{}}},"source":"in:\n  type: s3\n  bucket: s3bucket\n  path_prefix: path/to/sample_file    # path of *.csv or *.tsv file on your s3 bucket\n  access_key_id: xxxxxxxxxx\n  secret_access_key: xxxxxxxxxxx\nout:\n  type: td\n  apikey: xxxxxxxxxxxx\n  endpoint: api.treasuredata.com\n  database: dbname\n  table: tblname\n  time_column: datecolumn\n  mode: replace\n  #by default mode: append is used, if not defined.\n  # Imported records are appended to the target table with this mode.\n  #mode: replace, replaces existing target table\n  default_timestamp_format: '%d/%m/%Y'\n","lang":"yaml"},"children":[]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["For further details about additional parameters available for embulk-input-s3, see ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://github.com/embulk/embulk-input-s3"},"children":["Embulk Input S3"]}]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":2,"id":"guess-fields-generate-loadyml","__idx":4},"children":["Guess Fields (Generate load.yml)"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Embulk guess option uses ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["seed.yml"]}," to read the target file and automatically guesses the column types/settings and creates a new file ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["load.yml"]}," with this information."]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"bash","header":{"controls":{"copy":{}}},"source":"embulk guess seed.yml -o load.yml\n","lang":"bash"},"children":[]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Add the \"auto_create_table: true\" parameter to the load.yml, so that tables that do not exist are automatically."]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["This is a sample of the auto_create_table parameter in a .yml file."]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"yaml","header":{"controls":{"copy":{}}},"source":"out:\n  type: td\n  apikey: your apikey\n  endpoint: api.treasuredata.com\n  database: dbname\n  table: tblname\n  time_column: created_at\n  auto_create_table: true\n  mode: append\n","lang":"yaml"},"children":[]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["You must create the database and table in TD, prior to executing the load job."]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["If you either: 1) must add a database or 2) do not add the auto_create_table parameter in a .yml file and must add a table, run the following TD commands:"]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"bash","header":{"controls":{"copy":{}}},"source":"td database:create dbname\ntd table:create dbname tblname\n","lang":"bash"},"children":[]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["You can also create the database and table using Treasure Console."]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["You can preview the data using ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["embulk preview load.yml"]}," command. If any of the column types or data seems incorrect you may edit ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["load.yml"]}," file directly and preview again to verify. If ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["guess"]}," option doesn’t yield satisfactory results, you may change parameters in ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["load.yml"]}," according to your requirement manually using ",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"http://www.embulk.org/docs/built-in.html#csv-parser-plugin"},"children":["CSV/TSV parser plugin options"]},"."]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Create the database and table in TD, using the Treasure Console or from the command line:"]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"bash","header":{"controls":{"copy":{}}},"source":"$ td database:create dbname\n$ td table:create dbname tblname\n","lang":"bash"},"children":[]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":2,"id":"execute-load-job","__idx":5},"children":["Execute Load Job"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Run the import job using the following command:"]},{"$$mdtype":"Tag","name":"CodeBlock","attributes":{"data-language":"bash","header":{"controls":{"copy":{}}},"source":"embulk run load.yml\n","lang":"bash"},"children":[]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["It may take few mins to hours for the job to complete, depending on the size of the data."]}]},"headings":[{"value":"Embulk Bulk Import From Aws S3","id":"embulk-bulk-import-from-aws-s3","depth":1},{"value":"Prerequisites","id":"prerequisites","depth":2},{"value":"Install embulk-input-s3 Plugin","id":"install-embulk-input-s3-plugin","depth":2},{"value":"Create a Seed Configuration File","id":"create-a-seed-configuration-file","depth":2},{"value":"Guess Fields (Generate load.yml)","id":"guess-fields-generate-loadyml","depth":2},{"value":"Execute Load Job","id":"execute-load-job","depth":2}],"frontmatter":{"seo":{"title":"Embulk Bulk Import From Aws S3"}},"lastModified":"2026-06-02T03:56:21.000Z","pagePropGetterError":{"message":"","name":""}},"slug":"/int/embulk-bulk-import-from-aws-s3","userData":{"isAuthenticated":false,"teams":["anonymous"]},"isPublic":true}