File Corruption Example with Append
Example of writing to file from multiple threads that corrupts the content, even when we append.
Testing with SIMPLE_APPEND - Corruption started past 16K characters (presuming about 16KB)
- write_type=[SIMPLE_APPEND] characters_per_write=[1000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[2000] corruption_rate=0.00% co-routines=[2]
- ...
- write_type=[SIMPLE_APPEND] characters_per_write=[16000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[17000] corruption_rate=26.50% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[18000] corruption_rate=42.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[19000] corruption_rate=19.50% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[20000] corruption_rate=33.00% co-routines=[2]
Testing with BUFFERED_WRITER - Corruption started past 8K characters (presuming around 8KB)
- write_type=[BUFFERED_WRITER] characters_per_write=[1000] corruption_rate=0.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[2000] corruption_rate=0.00% co-routines=[2]
- ...
- write_type=[BUFFERED_WRITER] characters_per_write=[8000] corruption_rate=0.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[9000] corruption_rate=29.50% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[10000] corruption_rate=38.50% co-routines=[2]
Reproducible snapshot
GT-Sandbox-Snapshot
Code
package com.glassthought.sandbox
import kotlinx.coroutines.*
import java.io.File
import java.io.FileWriter
import java.io.BufferedWriter
import kotlin.system.measureTimeMillis
enum class FileWriteType {
SIMPLE_APPEND,
BUFFERED_WRITER
}
fun main(args: Array<String>) = runBlocking {
println("#### Testing with SIMPLE_APPEND:")
for (i in 1..20) {
go(
TestOptions(
charsPerWrite = i * 1_000,
writeType = FileWriteType.SIMPLE_APPEND
)
).getOneLinerReport().print()
}
println("\n#### Testing with BUFFERED_WRITER:")
for (i in 1..20) {
go(
TestOptions(
charsPerWrite = i * 1_000,
writeType = FileWriteType.BUFFERED_WRITER
)
).getOneLinerReport().print()
}
}
data class TestOptions(
val numCoroutines: Int = 2,
val charsPerWrite: Int,
val iterationsPerCoroutine: Int = 100,
val writeType: FileWriteType = FileWriteType.SIMPLE_APPEND
)
data class CorruptedLine(
val lineNumber: Int,
val line: String,
val sample: String,
val charDistribution: List<Pair<Char, Int>>,
val lineLength: Int
)
interface Report {
fun print()
}
class OneLinerReport(private val result: TestResult) : Report {
override fun print() = println(toString())
override fun toString(): String = buildString {
val corruptionRateStr = "%.2f%%".format(result.corruptionRate)
return "- write_type=[${result.options.writeType}] characters_per_write=[${result.options.charsPerWrite}] corruption_rate=${corruptionRateStr} co-routines=[${result.options.numCoroutines}]"
}
}
class ConciseReport(private val result: TestResult) : Report {
override fun print() = println(toString())
override fun toString(): String = buildString {
appendLine("### CONCISE REPORT characters per write [${result.options.charsPerWrite}]")
appendLine("Test Parameters:")
appendLine(" - Write Type: ${result.options.writeType}")
appendLine(" - Coroutines: ${result.options.numCoroutines}")
appendLine(" - Characters per write: ${result.options.charsPerWrite}")
appendLine(" - Iterations per coroutine: ${result.options.iterationsPerCoroutine}")
appendLine("\nResults:")
appendLine(" - Execution time: ${result.executionTimeMs}ms")
appendLine(" - Total lines: ${result.totalLines} (expected: ${result.expectedLines})")
appendLine(" - Corrupted lines: ${result.corruptedLines.size}")
appendLine(" - Corruption rate: %.2f%%".format(result.corruptionRate))
appendLine(" - File: ${result.tmpFile.name}")
}
}
class FullReport(private val result: TestResult, private val maxSamples: Int = 3) : Report {
override fun print() = println(toString())
override fun toString(): String = buildString {
appendLine("=== FULL REPORT ===")
appendLine("Test Parameters:")
appendLine(" - Write Type: ${result.options.writeType}")
appendLine(" - Coroutines: ${result.options.numCoroutines}")
appendLine(" - Characters per write: ${result.options.charsPerWrite}")
appendLine(" - Iterations per coroutine: ${result.options.iterationsPerCoroutine}")
appendLine(" - Letters used: ${('A'..<('A' + result.options.numCoroutines)).joinToString()}")
appendLine("\nExecution:")
appendLine(" - Time: ${result.executionTimeMs}ms")
appendLine(" - File: ${result.tmpFile.absolutePath}")
appendLine("\nCorruption Analysis:")
appendLine(" - Total lines written: ${result.totalLines}")
appendLine(" - Expected lines: ${result.expectedLines}")
appendLine(" - Corrupted lines found: ${result.corruptedLines.size}")
appendLine(" - Corruption rate: %.2f%%".format(result.corruptionRate))
if (result.corruptedLines.isNotEmpty()) {
appendLine(
"\nCorrupted Line Samples (showing ${
minOf(
maxSamples,
result.corruptedLines.size
)
} of ${result.corruptedLines.size}):"
)
result.corruptedLines.take(maxSamples).forEach { corrupted ->
appendLine("\n Line ${corrupted.lineNumber}:")
appendLine(" Sample: ${corrupted.sample}")
appendLine(" Character distribution (top 5): ${corrupted.charDistribution}")
appendLine(" Line length: ${corrupted.lineLength} (expected: ${result.options.charsPerWrite})")
}
if (result.corruptedLines.size > maxSamples) {
appendLine("\n ... and ${result.corruptedLines.size - maxSamples} more corrupted lines")
}
} else {
appendLine("\n No corruption detected!")
appendLine(" Try increasing coroutines/iterations or running multiple times.")
}
appendLine("\nFile Statistics:")
appendLine(" - File size: ${result.fileSize} bytes")
appendLine(" - Expected size: ~${result.expectedSize} bytes")
appendLine(" - Size difference: ${result.fileSize - result.expectedSize} bytes")
}
}
data class TestResult(
val options: TestOptions,
val tmpFile: File,
val executionTimeMs: Long,
val totalLines: Int,
val expectedLines: Int,
val corruptedLines: List<CorruptedLine>,
val fileSize: Long,
val expectedSize: Long
) {
val corruptionRate: Double
get() = if (totalLines > 0) {
corruptedLines.size.toDouble() / totalLines * 100
} else 0.0
fun getConciseReport(): Report = ConciseReport(this)
fun getOneLinerReport(): Report = OneLinerReport(this)
fun getFullReport(maxSamples: Int = 3): Report = FullReport(this, maxSamples)
fun cleanUp() {
tmpFile.delete()
}
}
suspend fun go(options: TestOptions): TestResult {
val tmpFile = File.createTempFile("corruption_test_", ".txt")
val letters = ('A'..'Z').toList()
val selectedLetters = letters.take(options.numCoroutines)
val executionTime = measureTimeMillis {
coroutineScope {
val jobs = selectedLetters.map { letter ->
launch(Dispatchers.IO) {
repeat(options.iterationsPerCoroutine) {
when (options.writeType) {
FileWriteType.SIMPLE_APPEND -> {
val stringToAppend = letter.toString().repeat(options.charsPerWrite) + "\n"
tmpFile.appendText(stringToAppend)
}
FileWriteType.BUFFERED_WRITER -> {
BufferedWriter(FileWriter(tmpFile, true)).use { writer ->
val line = letter.toString().repeat(options.charsPerWrite)
writer.write(line)
writer.newLine()
}
}
}
}
}
}
jobs.joinAll()
}
}
// Gather corruption data
var totalLines = 0
val corruptedLinesList = mutableListOf<CorruptedLine>()
tmpFile.bufferedReader().use { reader ->
reader.lineSequence().forEachIndexed { lineNumber, line ->
totalLines++
if (line.isNotEmpty()) {
val firstChar = line[0]
val isCorrupted = !line.all { it == firstChar }
if (isCorrupted) {
val sample = if (line.length > 100) {
line.substring(0, 100) + "..."
} else {
line
}
val charCounts = line.groupBy { it }
.mapValues { it.value.size }
.toList()
.sortedByDescending { it.second }
.take(5)
corruptedLinesList.add(
CorruptedLine(
lineNumber = lineNumber + 1,
line = line,
sample = sample,
charDistribution = charCounts,
lineLength = line.length
)
)
}
}
}
}
val expectedLines = options.numCoroutines * options.iterationsPerCoroutine
val fileSize = tmpFile.length()
val expectedSize = options.numCoroutines.toLong() * options.iterationsPerCoroutine * (options.charsPerWrite + 1)
return TestResult(
options = options,
tmpFile = tmpFile,
executionTimeMs = executionTime,
totalLines = totalLines,
expectedLines = expectedLines,
corruptedLines = corruptedLinesList,
fileSize = fileSize,
expectedSize = expectedSize
)
}
Command to reproduce:
gt.sandbox.checkout.commit b54c3189a9b0ede67e37 \
&& cd "${GT_SANDBOX_REPO}" \
&& cmd.run.announce "./gradlew run --quiet"
Recorded output of command:
Picked up JAVA_TOOL_OPTIONS: -Dkotlinx.coroutines.debug
Picked up JAVA_TOOL_OPTIONS: -Dkotlinx.coroutines.debug
#### Testing with SIMPLE_APPEND:
- write_type=[SIMPLE_APPEND] characters_per_write=[1000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[2000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[3000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[4000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[5000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[6000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[7000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[8000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[9000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[10000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[11000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[12000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[13000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[14000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[15000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[16000] corruption_rate=0.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[17000] corruption_rate=26.50% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[18000] corruption_rate=42.00% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[19000] corruption_rate=19.50% co-routines=[2]
- write_type=[SIMPLE_APPEND] characters_per_write=[20000] corruption_rate=33.00% co-routines=[2]
#### Testing with BUFFERED_WRITER:
- write_type=[BUFFERED_WRITER] characters_per_write=[1000] corruption_rate=0.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[2000] corruption_rate=0.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[3000] corruption_rate=0.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[4000] corruption_rate=0.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[5000] corruption_rate=0.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[6000] corruption_rate=0.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[7000] corruption_rate=0.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[8000] corruption_rate=0.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[9000] corruption_rate=29.50% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[10000] corruption_rate=38.50% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[11000] corruption_rate=29.50% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[12000] corruption_rate=34.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[13000] corruption_rate=38.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[14000] corruption_rate=32.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[15000] corruption_rate=28.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[16000] corruption_rate=10.50% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[17000] corruption_rate=49.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[18000] corruption_rate=49.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[19000] corruption_rate=49.00% co-routines=[2]
- write_type=[BUFFERED_WRITER] characters_per_write=[20000] corruption_rate=50.00% co-routines=[2]