<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="https://cloudsqale.com/wp-sitemap.xsl" ?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url><loc>https://cloudsqale.com/2018/09/25/collecting-s3-access-logs/</loc><lastmod>2018-09-26T13:51:43+00:00</lastmod></url><url><loc>https://cloudsqale.com/2018/09/26/operation-timed-out-for-hive-query-in-hue-and-qubole/</loc><lastmod>2018-09-26T13:40:08+00:00</lastmod></url><url><loc>https://cloudsqale.com/2018/10/05/simple-hive-queries-with-predicates-compressed-text-vs-orc-files/</loc><lastmod>2018-10-05T12:20:25+00:00</lastmod></url><url><loc>https://cloudsqale.com/2018/10/08/s3-monitoring-step-1-bucket-size-and-number-of-objects/</loc><lastmod>2018-10-08T16:48:27+00:00</lastmod></url><url><loc>https://cloudsqale.com/2018/10/09/s3-monitoring-step-2-read-operations/</loc><lastmod>2018-10-10T14:43:30+00:00</lastmod></url><url><loc>https://cloudsqale.com/2018/10/10/s3-monitoring-step-3-read-operations-and-file-types/</loc><lastmod>2018-10-18T10:55:59+00:00</lastmod></url><url><loc>https://cloudsqale.com/2018/10/18/s3-monitoring-4-read-operations-and-tables/</loc><lastmod>2018-12-27T14:23:54+00:00</lastmod></url><url><loc>https://cloudsqale.com/2018/10/22/tez-internals-1-number-of-map-tasks/</loc><lastmod>2018-10-22T15:41:56+00:00</lastmod></url><url><loc>https://cloudsqale.com/2018/11/12/tez-internals-2-number-of-map-tasks-for-large-orc-files-with-small-stripes-in-amazon-emr/</loc><lastmod>2018-11-12T10:39:12+00:00</lastmod></url><url><loc>https://cloudsqale.com/2018/12/04/s3-writes-when-inserting-data-into-a-hive-table-in-amazon-emr/</loc><lastmod>2018-12-04T17:46:56+00:00</lastmod></url><url><loc>https://cloudsqale.com/2018/12/17/orc-file-format-internals-creating-large-stripes-in-hive-tables/</loc><lastmod>2018-12-27T10:14:01+00:00</lastmod></url><url><loc>https://cloudsqale.com/2018/12/24/orc-files-split-computation-hive-on-tez/</loc><lastmod>2018-12-24T19:01:55+00:00</lastmod></url><url><loc>https://cloudsqale.com/2018/12/26/storage-tuning-for-mapped-json-conversion-to-orc-file-format-java-heap-issues-with-dictionary-encoding/</loc><lastmod>2018-12-26T13:51:37+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/01/04/yarn-resource-manager-silent-restarts-java-heap-space-error-amazon-emr/</loc><lastmod>2019-01-05T20:44:08+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/01/21/tez-memory-tuning-container-is-running-beyond-physical-memory-limits-solving-by-reducing-memory-settings/</loc><lastmod>2019-09-02T08:12:30+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/01/25/reduce-number-of-output-files-for-skewed-data-order-in-apache-pig-sampler-and-weighted-range-partitioner-to-balance-reducers/</loc><lastmod>2019-01-25T10:27:22+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/04/08/extremely-large-number-of-rdd-partitions-and-tasks-in-spark-on-amazon-emr/</loc><lastmod>2019-04-08T13:32:40+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/04/19/yarn-memory-under-utilization-running-low-memory-instances-c4-xlarge-i-e/</loc><lastmod>2019-04-19T10:56:58+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/05/03/performance-of-min-max-functions-metadata-operations-and-partition-pruning-in-snowflake/</loc><lastmod>2019-05-03T12:12:54+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/05/04/snowflake-remote-disk-i-o-local-disk-cache-capacity-utilization-and-transfer-rate/</loc><lastmod>2019-05-04T15:39:47+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/05/06/snowflake-reloading-data-from-stage-truncate-delete-copy-and-transactions/</loc><lastmod>2019-05-06T13:29:11+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/05/14/snowflake-monitoring-data-ingestion-using-query_history-and-copy_history-single-large-file-vs-multiple-small-files/</loc><lastmod>2019-05-14T10:39:10+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/05/11/snowflake-data-ingestion-cluster-utilization-idle-time-and-compute-cost/</loc><lastmod>2019-05-11T12:27:59+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/05/15/hadoop-yarn-collecting-utilization-metrics-from-multiple-clusters/</loc><lastmod>2019-05-15T11:10:38+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/05/20/amazon-emr-monitoring-auto-scaling-using-instance-controller-logs/</loc><lastmod>2019-05-21T10:13:36+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/05/23/amazon-emr-recovering-unhealthy-nodes-with-emr-services-down/</loc><lastmod>2019-05-23T08:00:44+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/05/23/amazon-emr-recovering-ghost-nodes/</loc><lastmod>2019-05-23T19:56:04+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/08/27/amazon-emr-downscaling-and-ghost-impaired-nodes/</loc><lastmod>2019-08-27T17:46:19+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/09/04/tuning-hadoop-yarn-boosting-memory-settings-beyond-the-limits-to-increase-cluster-capacity-and-utilization/</loc><lastmod>2019-09-05T15:42:26+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/09/11/hadoop-yarn-calculating-per-second-utilization-of-cluster-using-resource-manager-logs/</loc><lastmod>2019-09-11T05:51:27+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/10/18/sqoop-import-from-amazon-rds-read-replica-error-canceling-statement-due-to-conflict-with-recovery/</loc><lastmod>2019-10-19T14:22:35+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/11/01/apache-hive-pig-on-tez-long-running-tasks-and-their-failed-attempts-analyzing-performance-and-finding-bottlenecks-insufficient-parallelism-using-application-master-logs/</loc><lastmod>2019-11-01T13:46:13+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/11/16/apache-hive-monitoring-progress-of-long-running-reducers-hive-log-every-n-records-option/</loc><lastmod>2019-11-16T14:08:03+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/11/20/apache-hive-on-tez-quick-on-the-fly-profiling-of-long-running-tasks-using-jstack-probes-and-flame-graphs/</loc><lastmod>2019-11-20T16:37:26+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/11/28/presto-troubleshooting-query-exceeded-per-node-total-memory-limit-resource_overcommit-query-max-total-memory-per-node-reserved-pool-disk-spill/</loc><lastmod>2019-11-28T14:17:11+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/12/02/snowflake-micro-partitions-and-clustering-depth/</loc><lastmod>2019-12-02T16:16:18+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/12/04/presto-query-auto-scaling-limitations-low-utilization-after-upscale-group-by-partitioned-stage-query-manager-required-workers/</loc><lastmod>2019-12-04T14:28:36+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/12/04/presto-vs-hive-sla-risks-for-long-running-etl-failures-and-retries-due-to-node-loss/</loc><lastmod>2019-12-04T09:29:46+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/12/11/hive-issues-with-large-yarn-containers-low-concurrency-and-utilization-high-execution-time/</loc><lastmod>2019-12-11T11:22:27+00:00</lastmod></url><url><loc>https://cloudsqale.com/2019/12/30/spark-slow-load-into-partitioned-hive-table-on-s3-direct-writes-output-committer-algorithms/</loc><lastmod>2019-12-30T14:39:26+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/02/14/hadoop-yarn-cluster-idle-time/</loc><lastmod>2020-02-14T19:43:49+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/02/19/hadoop-yarn-container-virtual-memory-understanding-and-solving-container-is-running-beyond-virtual-memory-limits-errors/</loc><lastmod>2020-02-20T06:37:53+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/02/26/hive-on-tez-shuffle-failed-with-too-many-fetch-failures-and-insufficient-progress/</loc><lastmod>2020-02-27T11:36:01+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/04/12/flink-tuning-writes-to-s3-sink-fs-s3a-threads-max/</loc><lastmod>2020-04-12T20:54:19+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/04/17/s3-multipart-upload-s3-access-log-messages/</loc><lastmod>2020-04-19T19:57:00+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/04/29/flink-1-9-off-heap-memory-on-yarn-troubleshooting-container-is-running-beyond-physical-memory-limits-errors/</loc><lastmod>2020-04-29T16:15:22+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/05/02/s3-rest-api-http-1-1-requests-for-uploading-files/</loc><lastmod>2020-05-02T21:57:33+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/05/07/aws-ec2-vcpu-and-yarn-vcores-m4-c4-r4-instances/</loc><lastmod>2020-05-07T11:59:55+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/05/08/yarn-negative-vcores-capacity-scheduler-with-memory-resource-type/</loc><lastmod>2020-05-08T12:52:38+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/05/20/kinesis-client-library-kcl-2-x-consumer-load-balancing-rebalancing-taking-renewing-and-stealing-leases/</loc><lastmod>2020-05-24T09:27:54+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/05/26/flink-s3-checkpoints-monitoring-using-s3-access-logs/</loc><lastmod>2020-05-26T20:25:39+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/05/26/hive-table-for-s3-access-logs/</loc><lastmod>2020-05-26T09:43:29+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/05/27/s3-multipart-upload-5-mb-part-size-limit/</loc><lastmod>2020-05-27T14:30:57+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/05/29/how-parquet-files-are-written-row-groups-pages-required-memory-and-flush-operations/</loc><lastmod>2022-04-25T09:22:49+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/06/04/s3-low-latency-writes-using-aggressive-retries-to-get-consistent-latency-request-timeouts/</loc><lastmod>2020-06-04T10:09:21+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/06/09/flink-streaming-to-parquet-files-in-s3-massive-write-iops-on-checkpoint/</loc><lastmod>2020-06-09T08:45:57+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/06/18/how-map-column-is-written-to-parquet-converting-json-to-map-to-increase-read-performance/</loc><lastmod>2020-06-19T09:27:11+00:00</lastmod></url><url><loc>https://cloudsqale.com/2020/06/25/hadoop-yarn-monitoring-resource-consumption-by-running-applications-in-multi-cluster-environments/</loc><lastmod>2020-06-25T11:59:23+00:00</lastmod></url><url><loc>https://cloudsqale.com/2021/01/02/flink-and-s3-entropy-injection-for-checkpoints/</loc><lastmod>2021-01-02T06:55:54+00:00</lastmod></url><url><loc>https://cloudsqale.com/2021/01/15/parquet-1-x-file-format-footer-content/</loc><lastmod>2021-01-15T09:39:09+00:00</lastmod></url><url><loc>https://cloudsqale.com/2021/03/07/spark-reading-parquet-predicate-pushdown-for-like-operator-equalto-startswith-and-contains-pushed-filters/</loc><lastmod>2021-03-07T19:14:15+00:00</lastmod></url><url><loc>https://cloudsqale.com/2021/03/19/spark-reading-parquet-why-the-number-of-tasks-can-be-much-larger-than-the-number-of-row-groups/</loc><lastmod>2021-03-19T10:12:56+00:00</lastmod></url><url><loc>https://cloudsqale.com/2022/04/20/amazon-emr-spark-ignoring-partition-filter-and-listing-all-partitions-when-reading-from-s3a/</loc><lastmod>2022-04-20T14:04:48+00:00</lastmod></url><url><loc>https://cloudsqale.com/2022/08/26/emr-spark-much-larger-executors-are-created-than-requested/</loc><lastmod>2022-08-26T11:23:55+00:00</lastmod></url><url><loc>https://cloudsqale.com/2022/08/29/emr-spark-initial-number-of-executors-and-spark-dynamicallocation-enabled/</loc><lastmod>2022-08-29T11:03:35+00:00</lastmod></url><url><loc>https://cloudsqale.com/2022/08/30/spark-create-multiple-output-files-per-task-using-spark-sql-files-maxrecordsperfile/</loc><lastmod>2022-08-30T09:46:16+00:00</lastmod></url><url><loc>https://cloudsqale.com/2022/08/30/spark-2-4-slow-performance-on-writing-into-partitions-why-sorting-involved/</loc><lastmod>2022-08-30T16:20:44+00:00</lastmod></url><url><loc>https://cloudsqale.com/2023/07/19/spark-number-of-tasks-reading-large-number-of-small-parquet-files/</loc><lastmod>2023-07-19T19:11:22+00:00</lastmod></url><url><loc>https://cloudsqale.com/2023/09/17/spark-limit-on-large-datasets-collectlimit-globallimit-locallimit-spark-sql-limit-scaleupfactor/</loc><lastmod>2023-09-17T16:17:09+00:00</lastmod></url><url><loc>https://cloudsqale.com/2023/10/01/spark-aqe-stage-numeration-added-jobs-at-runtime-large-number-of-tasks-pending-and-skipped-stages/</loc><lastmod>2023-10-01T22:26:17+00:00</lastmod></url><url><loc>https://cloudsqale.com/2023/10/06/spark-stage-restarts-partial-restarts-multiple-retry-attempts-with-different-task-sets-accepted-late-results-from-failed-stages-cost-of-restarts/</loc><lastmod>2023-10-06T17:57:21+00:00</lastmod></url><url><loc>https://cloudsqale.com/2023/10/10/spark-reading-parquet-pushed-filters-substrtimestamp-1-10-like-and-stringstartswith/</loc><lastmod>2023-10-11T12:08:10+00:00</lastmod></url><url><loc>https://cloudsqale.com/2023/10/15/distributed-count-distinct-how-it-works-in-spark-multiple-count-distinct-transform-to-count-with-expand-exploded-shuffle-partial-aggregations/</loc><lastmod>2023-10-17T09:39:41+00:00</lastmod></url><url><loc>https://cloudsqale.com/2023/10/25/spark-reading-json-sampling/</loc><lastmod>2023-10-25T20:47:35+00:00</lastmod></url><url><loc>https://cloudsqale.com/2023/11/26/spark-order-by-implementation/</loc><lastmod>2023-11-26T19:23:05+00:00</lastmod></url></urlset>
