From d98f46ce647846b0aa30b2e16a30fd4e152a1bf5 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 10 Jul 2025 22:55:07 +0200 Subject: Add new code Signed-off-by: Carlos Maiolino --- PGU/CHAP9/count-chars.s | 47 +++++++ PGU/CHAP9/error.s | 46 +++++++ PGU/CHAP9/linux.s | 16 +++ PGU/CHAP9/memalloc.s | 303 ++++++++++++++++++++++++++++++++++++++++++++++ PGU/CHAP9/read-records.s | 86 +++++++++++++ PGU/CHAP9/read_write.s | 62 ++++++++++ PGU/CHAP9/record-def.s | 7 ++ PGU/CHAP9/write-newline.s | 29 +++++ 8 files changed, 596 insertions(+) create mode 100644 PGU/CHAP9/count-chars.s create mode 100644 PGU/CHAP9/error.s create mode 100644 PGU/CHAP9/linux.s create mode 100644 PGU/CHAP9/memalloc.s create mode 100644 PGU/CHAP9/read-records.s create mode 100644 PGU/CHAP9/read_write.s create mode 100644 PGU/CHAP9/record-def.s create mode 100644 PGU/CHAP9/write-newline.s (limited to 'PGU/CHAP9') diff --git a/PGU/CHAP9/count-chars.s b/PGU/CHAP9/count-chars.s new file mode 100644 index 0000000..7e12791 --- /dev/null +++ b/PGU/CHAP9/count-chars.s @@ -0,0 +1,47 @@ +# Count the characters in a string (until a null byte is reached) +# It's supposed to behave similarly with strlen() +# +# Returns the count in %rax +# +# - %rcx: Char count +# - %rdx: current Char address +# - %al: current char +# + +.type count_chars, @function +.globl count_chars + +# We receive the string addr in the stack. +# Remember %rsp + 8 contains the return value +.equ ST_STRING_ADDRESS, 16 + +count_chars: + pushq %rbp + movq %rsp, %rbp + + #initialize counter + movq $0, %rcx + + # Start address of string: + movq ST_STRING_ADDRESS(%rbp), %rdx + +count_loop_begin: + + # Grab char + movb (%rdx), %al + + cmpb $0, %al + je count_loop_end + + # We are not done yet... + incq %rcx + incq %rdx + jmp count_loop_begin + +count_loop_end: + + movq %rcx, %rax + + movq %rbp, %rsp + popq %rbp + ret diff --git a/PGU/CHAP9/error.s b/PGU/CHAP9/error.s new file mode 100644 index 0000000..6e933ed --- /dev/null +++ b/PGU/CHAP9/error.s @@ -0,0 +1,46 @@ +# Error function +# +# Print error messages and exit program. + +.include "linux.s" +.equ ST_ERROR_CODE, 16 +.equ ST_ERROR_MSG, 24 + +.globl error_exit +.type error_exit, @function + +error_exit: + pushq %rbp + movq %rsp, %rbp + + # Write error Code + movq ST_ERROR_CODE(%rbp), %rcx + pushq %rcx + call count_chars + popq %rcx + + movq %rax, %rdx + movq %rcx, %rsi + movq $STDERR, %rdi + movq $SYS_WRITE, %rax + syscall + + # Write error message + movq ST_ERROR_MSG(%rbp), %rcx + pushq %rcx + call count_chars + popq %rcx + + movq %rax, %rdx # How many bytes to write + movq %rcx, %rsi # Buffer to write from + movq $STDERR, %rdi # FD to write to + movq $SYS_WRITE, %rax # self explanatory + syscall + + pushq $STDERR + call write_newline + + # Exit program + movq $SYS_EXIT, %rax + movq $1, %rdi + syscall diff --git a/PGU/CHAP9/linux.s b/PGU/CHAP9/linux.s new file mode 100644 index 0000000..9ab8243 --- /dev/null +++ b/PGU/CHAP9/linux.s @@ -0,0 +1,16 @@ +# Syscall numbers (x86_64) + +.equ SYS_EXIT, 60 +.equ SYS_READ, 0 +.equ SYS_WRITE, 1 +.equ SYS_OPEN, 2 +.equ SYS_CLOSE, 3 +.equ SYS_BRK, 12 + +# Default File Descriptors +.equ STDIN, 0 +.equ STDOUT, 1 +.equ STDERR, 2 + +# Common Status Codes +.equ END_OF_FILE, 0 diff --git a/PGU/CHAP9/memalloc.s b/PGU/CHAP9/memalloc.s new file mode 100644 index 0000000..6f354de --- /dev/null +++ b/PGU/CHAP9/memalloc.s @@ -0,0 +1,303 @@ + +# Alloc and Dealloc memory as requested +# +# Programs using these routines will ask +# for a certain size of memory. We actually +# use more than that size for metadata, +# but we put it at the beginning, before the +# pointer we hand back. We add a size field and +# and AVAILABLE/UNAVAILABLE marker. +# +# The whole memory slot looks like this: +# +#################################################### +# Available marker | Size | Actual memory location # +#################################################### +# ^-- returned pointer points +# here +# +# The calling program won't see our metadata. + +### GLOBAL VARS ### +.section .data + +############################### DEBUG INFO ################################ + +# Originally, these labels heap_begin and current_break are .long long... +# This implicitly means 4 bytes for each variable. This was causing the instructions +# in allocate_init to overlap the variables when writing to them: + +# The following instructions were the problem: +# movq %rax, current_break +# allocated at 0x402011 +# movq %rax, heap_begin +# allocated at 0x40200d +# +# Static allocation grows up in memory... But, when writing %rax to heap_begin, +# due the variable sizes being 4bytes, the instruction was corrupting +# current_break variable, causing the program to crash later in the allocation +# loop, because the heap_begin allocation, consequently caused current_break to be +# zero causing a NULL ptr dereference (or simply a SEGFAULT): + +# => 0x000000000040114e <+5>: mov 0x8(%rax),%rdx +# Program received signal SIGSEGV, Segmentation fault. + +# allocate_init() disassemble: +# 0x0000000000401105 <+0>: push %rbp +# 0x0000000000401106 <+1>: mov %rsp,%rbp +# 0x0000000000401109 <+4>: mov $0xc,%rax +# 0x0000000000401110 <+11>: mov $0x0,%rdi +# 0x0000000000401117 <+18>: syscall +# 0x0000000000401119 <+20>: inc %rax +# 0x000000000040111c <+23>: mov %rax,0x402011 +# 0x0000000000401124 <+31>: mov %rax,0x40200d +#=> 0x000000000040112c <+39>: mov %rbp,%rsp +# 0x000000000040112f <+42>: pop %rbp +# 0x0000000000401130 <+43>: ret +# +# (gdb) info register rax +# rax 0x403001 4206593 +# (gdb) p /x *0x40200d +# $5 = 0x403001 +# (gdb) p /x *0x402011 +# $6 = 0x0 <-- Here, the address of current_break got zeroed after +# heap_being has been changed + +############################# END OF DEBUG INFO ############################# + +# Points to the beginning of the memory we are managing +heap_begin: + .quad 0 + +# Points to one locaiton past the memory we are managing +current_break: + .quad 0 + +### HEADER STRUCTURE INFORMATION ### + +# To make things simpler, we use one word for +# each field in the header + +.equ HEADER_SIZE, 16 # size of space for memory region header +.equ HDR_AVAIL_OFFSET, 0 # Location of the 'available' flag in the header +.equ HDR_SIZE_OFFSET, 8 # Location of the size field in the header + + +### CONSTANTS ### +.equ UNAVAILABLE, 0 +.equ AVAILABLE, 1 + +.equ SYS_BRK, 12 # syscall number for brk() syscall in x86_64 + + +.section .text + +### FUNCTIONS ### + +## allocate_init ## +# +# Call this function to initialize the functions by setting heap_begin and +# current_break. No parameters and no return value + +.globl allocate_init +.type allocate_init, @function + +allocate_init: + pushq %rbp # standard function stuff + movq %rsp, %rbp + + # If brk() syscall is called with a 0 in %rdi, it returns the last valid + # usable address + movq $SYS_BRK, %rax + movq $0, %rdi + syscall + + incq %rax # brk(0) returns the current break in %rax, we want the + # value after that + + movq %rax, current_break # Store the current break (actually the address + # after that) + + movq %rax, heap_begin # Our heap starts where the break is now. First + # address of uninitialized memory. This will + # cause the allocate function to get more memory + # from Linux the first time it is run. + + movq %rbp, %rsp # Exit the function + popq %rbp + ret + +## END of allocate_init ## + + +## allocate ## +# +# Grab a section of memory. +# - Checks to see if there are any free blocks +# - If not, asks Linux for more memory for the +# heap through brk() syscall +# +# - Receives on parameter, the size of the memory block +# we want to allocate +# +# - Returns the address of the allocated memory in %rax, or 0 +# if there is no memory available on the system +# +# ### Process +# +# %rcx - holds the size of requested memory (first/only parameter) +# %rax - current memory region being examined +# %rbx - Memory address past the end of the heap +# %rdx - size of the current memory region +# +# We scan through each memory region starting with heap_begin. We look at the +# size of each one and if it has been allocated. If it's big enough for the +# requested size, and it's available, we grab that one. If we do not find a +# region large enough, we ask Linux for more memory, which in case, moves the +# current_break up. + +.globl allocate +.type allocate, @function +.equ ST_MEM_SIZE, 16 # Stack position of the memory size to allocate + +allocate: + pushq %rbp + movq %rsp, %rbp + + movq ST_MEM_SIZE(%rbp), %rcx # %rcx now holds the memory size we are + # looking for (first/only parameter) + + movq heap_begin, %rax # %rax will hold the current search location + movq current_break, %rbx + + alloc_loop_begin: # Scan memory regions + + cmpq %rbx, %rax # heap needs more memory if these are + # equal. %rax will hold the end of the + # next memory region at each iteraction + # until it hits the current_break. + + je move_break + + # Retrieve the size of this mem slot + movq HDR_SIZE_OFFSET(%rax), %rdx + + cmpq $UNAVAILABLE, HDR_AVAIL_OFFSET(%rax) # If the space is + # unavailable, i.e. + # already in use.. Go + je next_location # to the next one. + + cmpq %rdx, %rcx # If the slot is available, check if + jle allocate_here # it's big enough. + + next_location: + addq $HEADER_SIZE, %rax # Total size of the memory region is the + # sum of the size requested (currently + # at %rdx), plus the 16 bytes for the + addq %rdx, %rax # header. + + jmp alloc_loop_begin # Go look at the next location + + allocate_here: # If we've made it here, that means that + # the region header of the region to + # allocate is in %rax + + # Mark space as unavailable + movq $UNAVAILABLE, HDR_AVAIL_OFFSET(%rax) + addq $HEADER_SIZE, %rax # move %rax past the header, so it + # points to the usable memory. Such + # address is returned to the user. + + # Normal function return + movq %rbp, %rsp + popq %rbp + ret + + move_break: # We have exhausted all addressable memory, we need to + # get more from Linux + # %rbx holds the current endpoint of the data, and + # %rcx its size. + + addq $HEADER_SIZE, %rbx # We need to increase %rbx to where we _want_ + # memory to end. So we account for the header + addq %rcx, %rbx # and the user's requested size + + # Ask Linux for more memory + + # We'll need the values in these registers, so save them... + pushq %rax + pushq %rbx + pushq %rcx + + movq $SYS_BRK, %rax # Set new break by calling brk() + movq %rbx, %rdi # x86_64 uses %rdi for first parameter + syscall + + # brk() returns 0 for error or the address we asked for (or larger if it + # needs to be rounded up). We don't really care here where it ends up + # setting the break as long as it isn't 0. + + cmpq $0, %rax # Check for errors + je error + + # Restore our registers + popq %rcx + popq %rbx + popq %rax + + # Set this memory as unavailable since we are about to give it away + movq $UNAVAILABLE, HDR_AVAIL_OFFSET(%rax) + movq %rcx, HDR_SIZE_OFFSET(%rax) # Set memory size + + addq $HEADER_SIZE, %rax # Set %rax to the address being + # returned to the user (user + # doesn't know anything about the headers + + movq %rbx, current_break # Save the current break. In + # reality it may be larger due + # rounding, but we don't care + # about memory footprint here + + movq %rbp, %rsp + popq %rbp + ret + + error: + movq $0, %rax # Return 0 on error + movq %rbp, %rsp + popq %rbp + ret +##### END OF allocate ###### + +## deallocate ## +# +# Give back a region of memory to the pool after user is done +# with it +# +# Parameter: Address of the memory to be freed +# No return value +# +# The memory address returned to the user starts at 2 words beyond its header, +# all we need to do here is mark the memory slot as free (available). +# By now we don't care about moving the break back. +.globl deallocate +.type deallocate, @function + +# Stack position of the memory region to be free (function's parameter) +.equ ST_MEMORY_SEG, 8 # We are not saving %rbp here so we just need to skip ret + # address + +deallocate: + # Function is too simple, no need for fancy function stuff + + # Get the address of the memory to free (Normally this is 16(%rbp), but + # since we didn't push %rbp or move %rsp to %rbp, we can just do 8(%rsp) + movq ST_MEMORY_SEG(%rsp), %rax + + # Get the pointer to the beginning of the region (i.e. to the header) + subq $HEADER_SIZE, %rax + + # Mark it as available + movq $AVAILABLE, HDR_AVAIL_OFFSET(%rax) + ret +##### END OF deallocate ##### diff --git a/PGU/CHAP9/read-records.s b/PGU/CHAP9/read-records.s new file mode 100644 index 0000000..01384a0 --- /dev/null +++ b/PGU/CHAP9/read-records.s @@ -0,0 +1,86 @@ +# Read records previously written in file.dat, by write-records software + +.include "linux.s" +.include "record-def.s" + +.section .data + filename: + .ascii "test.dat\0" + record_buffer_ptr: + .long 0 + +.section .text + +.globl _start + +_start: + # Stack locations for INPUT and OUTPUT FDs + .equ ST_INPUT_DESCRIPTOR, -8 + .equ ST_OUTPUT_DESCRIPTOR, -16 + + movq %rsp, %rbp + subq $16, %rsp # Save space in the stack for FDs + + call allocate_init # Initialize our memory + + # allocate buffer + pushq $RECORD_SIZE + call allocate + movq %rax, record_buffer_ptr + + # Open data file + movq $SYS_OPEN, %rax + movq $filename, %rdi + movq $0, %rsi # Open for read only + movq $0666, %rdx + syscall + + # Save FD + movq %rax, ST_INPUT_DESCRIPTOR(%rbp) + + # Yes, STDOUT is always 1, but if I want to change the output location + # later, I don't need to change everywhere... + movq $STDOUT, ST_OUTPUT_DESCRIPTOR(%rbp) + +record_read_loop: + pushq ST_INPUT_DESCRIPTOR(%rbp) + pushq record_buffer_ptr + call read_record + addq $16, %rsp # Cleanup stack + + # All records are RECORD_SIZE size, if we didn't get this amount of + # bytes from read function, we either are at EOF or we hit an error. + cmpq $RECORD_SIZE, %rax + jne finished_reading + + # We are ok, so print the first name in the record + movq record_buffer_ptr, %rax + addq $RECORD_FIRSTNAME, %rax # Seek offset of the firstname in the + # buffer + pushq %rax + call count_chars + addq $8, %rsp # Cleanup stack + + # Write name to OUTPUT + movq %rax, %rdx # Returned record size, used as argument to + # write() + + movq record_buffer_ptr, %rsi + addq $RECORD_FIRSTNAME, %rsi + movq ST_OUTPUT_DESCRIPTOR(%rbp), %rdi + movq $SYS_WRITE, %rax + syscall + + pushq ST_OUTPUT_DESCRIPTOR(%rbp) + call write_newline + addq $8, %rsp + + jmp record_read_loop + + # Free buffer + pushq record_buffer_ptr + call deallocate +finished_reading: + movq $SYS_EXIT, %rax + movq $0, %rdi + syscall diff --git a/PGU/CHAP9/read_write.s b/PGU/CHAP9/read_write.s new file mode 100644 index 0000000..d574013 --- /dev/null +++ b/PGU/CHAP9/read_write.s @@ -0,0 +1,62 @@ +.include "record-def.s" +.include "linux.s" + +# Read function +# Reads a record from the file descriptor +# and writes it into the buffer passed + +# STACK VARS - Used for both read and write functions. They don't share the +# location, but the arguments are passed in the same position +# for both, so, no need to create different location vars. + +.equ ST_BUFFER, 16 # Ret address is at %rsp+8 +.equ ST_FILEDES, 24 + +.section .text + +.globl read_record +.type read_record, @function + +read_record: + pushq %rbp + movq %rsp, %rbp + + # READ A RECORD + pushq %rdi + movq ST_FILEDES(%rbp), %rdi + movq ST_BUFFER(%rbp), %rsi + movq $RECORD_SIZE, %rdx + movq $SYS_READ, %rax + syscall + + # NOTE: %rax has the return value, which we will give back + # to our caller + + popq %rdi + + movq %rbp, %rsp + popq %rbp + ret + +.globl write_record +.type write_record, @function + +write_record: + pushq %rbp + movq %rsp, %rbp + + # WRITE A RECORD + pushq %rdi + movq ST_FILEDES(%rbp), %rdi + movq ST_BUFFER(%rbp), %rsi + movq $RECORD_SIZE, %rdx + movq $SYS_WRITE, %rax + syscall + + # NOTE: %rax has the return value, which we will give back + # to our caller + popq %rdi + + movq %rbp, %rsp + popq %rbp + ret diff --git a/PGU/CHAP9/record-def.s b/PGU/CHAP9/record-def.s new file mode 100644 index 0000000..9e5274d --- /dev/null +++ b/PGU/CHAP9/record-def.s @@ -0,0 +1,7 @@ +# Define offsets within a record + +.equ RECORD_FIRSTNAME, 0 +.equ RECORD_LASTNAME, 40 +.equ RECORD_ADDRESS, 80 +.equ RECORD_AGE, 320 # Use 8 bytes for age, because it's +.equ RECORD_SIZE, 328 # simpler to deal with whole words diff --git a/PGU/CHAP9/write-newline.s b/PGU/CHAP9/write-newline.s new file mode 100644 index 0000000..e916379 --- /dev/null +++ b/PGU/CHAP9/write-newline.s @@ -0,0 +1,29 @@ +# Just write a newline (\n) to STDOUT + +.include "linux.s" +.type write_newline, @function +.globl write_newline + +.section .data + +newline: + .ascii "\n" + +.section .text + .equ ST_FILEDES, 16 + +write_newline: + pushq %rbp + movq %rsp, %rbp + + movq $SYS_WRITE, %rax + movq ST_FILEDES(%rbp), %rdi + movq $newline, %rsi + movq $1, %rdx + syscall + + movq %rbp, %rsp + popq %rbp + ret + + -- cgit v1.2.3