# COMPUTER SCIENCE AND INFORMATION TECHNOLOGY

QUESTION

Program 4.1

#  include “340.h”

#ifndef H_PROG6

#define H_PROG6

#define D1 “prog6.d1”

#define D2 “prog6.d2”

#define D3 “prog6.d3″

#define INT_SZ 4

#define FLT_SZ 7

#define STR_SZ 12

#define INT_LN 15

#define FLT_LN 9

#define STR_LN 5

template<class T,class U>

void insert(vector<T>& vect, const T& input, U sortType)

{

vect.push_back(input);

int index = vect.size() – 1;

upheap(vect, index, sortType);

}

template<class T,class U>

T remove(vector<T>&, U);

template<class T,class U>

void upheap(vector<T>& vect, int index, U sortType)

{

while (index != vect[0])// && vect[index] < vect[index / 2 – 1])

{

if (vect[index] > vect[index /2 – 1])

swap(vect[index], vect[index / 2 – 1]);

//bool value = sortType(vect[index], vect[index / 2 – 1]);

//if (value == true)

//    swap(vect[index], vect[index / 2 – 1]);

}

}

template<class T,class U>

void downheap(vector<T>&, int, U);

template<class T,class U>

void print_list(vector<T>& vect, const int input, const int lineSize, U sortType)

{

int size1 = vect.size() -1 ;

for(int j = 0; j < size1; j++)

cout << vect[ j ] << ” “;

}

template<class T,class U>

void get_list(vector<T>& vect, const char* path, U sortType)

{

T input;

ifstream inFile;

inFile.open(path);

if (!inFile)

return;

while (inFile >> input)

{

cout << input << endl;

insert(vect, input, sortType);

}

inFile.close();

}

#endif

Program 4.3

#include <iostream>

int main()

{

vector<char>    v1(13);   // heap of Char

vector<int>  v2(2);   // heap of Unsigned Int

vector<int> v3(2);   // heap of Unsigned Int

vector<int> v4(4);   // heap of Unsigned Int

vector<char> v4(30);   // heap of Unsigned Char

cout << “\t\t\t*** c program 340: Program 4.2 – Output ***\n\n”;

// sort and print first list

cout << “first list – ascending order:\n\n”;

get_list(v1, D1, less<int>());

print_list(v1, INT_SZ, INT_LN, less<int>());

}

___________________________________________________________________________________

Program 4.2

#include “340.h”

#ifndef H_PROG6

#define H_PROG6

#define D1 “prog6.d1”

#define D2 “prog6.d2”

#define D3 “prog6.d3″

#define INT_SZ 4

#define FLT_SZ 7

#define STR_SZ 12

#define INT_LN 15

#define FLT_LN 9

#define STR_LN 5

template<class T,class U>

void insert(vector<T>& vect, const T& input, U sortType)

{

vect.push_back(input);

<strong> int index = vect.size();  </strong>

upheap(vect, index, sortType);

}

template<class T,class U>

T remove(vector<T>&, U);

template<class T,class U>

void upheap(vector<T>& vect, int index, U sortType)

{

<strong>     while (index != vect[0] && vect[index] < vect[index / 2 – 1])

{

if (sortType(vect[index] , vect[index / 2 – 1]))

swap(vect[index], vect[index / 2 – 1]);

} </strong>

}

template<class T,class U>

void downheap(vector<T>&, int, U);

template<class T,class U>

void print_list(vector<T>& vect, const int input, const int lineSize, U sortType)

{

int size1 = vect.size();

for(int j = 0; j < size1; j++)

cout << vect[ j ] << ” “;

}

template<class T,class U>

void get_list(vector<T>& vect, const char* path, U sortType)

{

T input;

ifstream inFile;

inFile.open(path);

if (!inFile)

return;

while (inFile >> input)

{

cout << input << endl;

insert(vect, input, sortType);

}

inFile.close();

}

#endif

Program 4.4

#include <iostream>

int main()

{

vector<char>    v1(13);   // heap of Char

vector<int>  v2(2);   // heap of Unsigned Int

vector<int> v3(2);   // heap of Unsigned Int

vector<int> v4(4);   // heap of Unsigned Int

vector<char> v4(30);   // heap of Unsigned Char

cout << “\t\t\t*** c program 340: Program 4.4 – Output ***\n\n”;

// sort and print first list

cout << “first list – ascending order:\n\n”;

get_list(v1, D1, less<int>());

print_list(v1, INT_SZ, INT_LN, less<int>());

get_list(v2, D2, less<int>());

print_list(v2, INT_SZ, INT_LN, less<int>());

get_list(v3, D3, less<int>());

print_list(v3, INT_SZ, INT_LN, less<int>());

}

SOLUTION

RMIT
School of Computer Science and Information Technology
COSC2406/7 – Database Systems
Assignment #1: File Organisations
Due: 11:59 pm on Friday 13 April 2012
Marks: This assignment is worth 15% of your overall mark
1 Introduction
This is an individual assignment
In this assignment, you will carry out a number of exercises to investigate the creation and
searching of heap and sorted ﬁles.
The “Database Systems” blackboard contains further announcements and a list of frequently
asked questions. You are expected to check the discussion board on daily basis. Login through
https://my.rmit.edu.au.
Have a look at the ﬁle /scratch/DatabaseSystems/DATA/data
2012 on yallara. It is
around 13 Mb in size, and contains variable-length records, in comma-separated-value (CSV)
format. Each record contains data about characters in an imaginary online game; as you can see,
the length of the corresponding ﬁelds in different records can vary. Your task is to investigate
access times to this ﬁle by developing several C programs. Speciﬁcally, you will investigate the
performance of disks for different storage and retrieval methods.
2 Plagiarism
All assignments will be checked with plagiarism-detection software; any student found to have
plagiarised will be subject to disciplinary action as described in the course guide. Plagiarism
includes submitting code that is not your own or submitting text that is not your own. Submitting
one comment in your code or a sentence from someone else’s report is plagiarism, and
plagiarism includes submitting work from previous years. Allowing others to copy your work
is also plagiarism. All plagiarism will be penalised; there are no exceptions and no excuses. For
further information, please see: http://www.cs.rmit.edu.au/students/integrity/. You
have been warned.
3 General Requirements
This section contains information about the general requirements that your assignment must
1. You must implement your programs in C. Your programs must be well written, using
good coding style and including appropriate use of comments. Your markers will look at
your source code. Coding style will form part of the assessment of this assignment.
2. Your programs may be developed on any machine, but must compile and run on yallara.
3. Any code you submit must be able to be built using a single Makefile with the command:
> make all
1
If your marker cannot compile your programs due to the absence of a Makefile, you risk
yielding zero marks for the coding component of your assignment.
4. Paths must not be hard-coded.
5. Some coding tasks require timing. You must use the Solaris library function
gethrtime() for that purpose.
6. Diagnostic messages must be output to stderr.
7. Your assignment must be submitted using weblearn. See Section 6 for details.
8. Parts of this assignment will ask you to analyse your results, and to write about your
conclusions in a report. The report must be plain text ﬁles. They may not be word
processor documents or text ﬁles with markup (such as L
T
E
X or XML). In addition, text
A
must be wrapped to a maximum of 80 characters per line. Files that do not meet this
requirement may not be marked. Your report must be well-written. The RMIT Study
http://www.rmit.edu.au/ID=s1jjotkfdyn.
below. File names are case sensitive, i.e. if it is speciﬁed that the ﬁle name is gryphon,
then that is exactly the ﬁle name you must submit; Gryphon, GRYPHON, griffin, and
anything else but gryphon will be rejected. If you do not obey the ﬁle naming rules, you
risk yielding zero marks for the corresponding task.
10. For some tasks, you need to generate large output ﬁles. If you do not have enough space
in your own account, you can create a directory under /scratch/DatabaseSystems
on yallara. The directory that you create should have the same name as your
DatabaseSystems/jbloggs. Note that this directory is only for storing temporary
output ﬁles. You must not store source code or other ﬁles that you wish to keep
here, as the lifespan of these directories is only short-term. Please clean up your
/scratch/DatabaseSystems directories every day, i.e. delete them before you log out
of yallara, to free up disk space.
11. Important: You must run all your experiments on yallara, because the disk that hosts
the ﬁle is local to this machine.
4.1 Writing a heap ﬁle (10%)
Write a program to create a heap ﬁle that holds the records currently in the ﬁle /scratch/
DatabaseSystems/DATA/data
2012 on yallara. The source records are variable-length.
However, the heap ﬁle should hold ﬁxed-length records. Create the new records according
to the schema given in Table 1.
All attributes with Unsigned Int type must be stored in binary, e.g. if the value of ID is equal
to 70, it must be stored as 70 (in decimal) or 46 (in hexadecimal; in C: 0x46). It must not be
stored as the string “70”, occupying two bytes. Your heap ﬁle is therefore a binary ﬁle.
For simplicity, the heap ﬁle does not need a header (containing things like the number of
records in the ﬁle or a free space list). The ﬁle should be packed, i.e. there is no gap between
2
Table 1: Relation schema.
Attribute name Data type Size (bytes)
NAME Char 13
RACE Unsigned Int 2
CLASS Unsigned Int 2
ID Unsigned Int 4
GUILD Char 30
Total size: 51
records. Note that you will need to ensure that the size of each record matches the size shown
in Table 1. To ensure that records are correctly packed in memory, you may need to use the
#pragma pack(1)
The executable name of this program must be wHeap and should be executed using the com-
mand:
> ./wHeap data_2012 heap pagesize
where data 2012 is the input ﬁle, heap is an output ﬁle to which your converted data is written,
and pagesize is an integer specifying how many records ﬁt into a “page” of your ﬁle.
Your program should write out one “page” of the ﬁle at a time (for example, with a
pagesize of 100, you would write out 100 records to disk at a time).
Your wHeap program must not output anything to stdout.
4.2 Search on a heap ﬁle (20%)
Look at the ﬁle /scratch/DatabaseSystems/DATA/search
2012. This is a text ﬁle containing
search key values; each entry is a particular ID (in the schema given above). You are to
simulate searching over a heap ﬁle, with different assumptions for the size of ﬁle pages.
Write a program to perform equality search operations on the heap ﬁle produced by your
wHeap program in Section 4.1. The executable name of this program must be sHeap and it must
be able to be executed using the command:
> ./sHeap search_2012 heap pagesize
where search
2012 is the name of the ﬁle containing the keys to be searched for; heap is the
output ﬁle of our wHeap program; and pagesize is an integer value that speciﬁes the size of the
disk page that you are simulating.
Your program should read in the ﬁle, one “page” at a time. For example, if the pagesize
parameter is 100, your program should read in the ﬁrst 100 records from disk. These can then
be scanned, in-memory, for a match. If a match is found, print the matching record to stdout.
You should assume that ID is a primary key. If no match is found, read in the next pagesize
records of the ﬁle. The process should continue until either a matching record is found, or there
are no more records in the ﬁle to process.
If a match is found, the program must print the matching record to stdout. If no match is
found, a suitable message should be printed. In addition, the program must always output the
total time taken to do all the search operations in milliseconds to stdout. For example, if the
time taken to do the reading is 123.45 ms, the output would be:
Time: 123.45 ms
3
4.3 Writing a sorted ﬁle (15%)
Write a program to create a sorted ﬁle that stores the records currently in the ﬁle /scratch/
DatabaseSystems/DATA/data
2012 on yallara. You may modify your code from Section
4.1. Records should use the same ﬁxed-length schema given previously, and should again
be written in binary.
When inserting the records into your new ﬁle, they should be sorted on an appropriate
attribute. You will need to choose a sensible sorting algorithm, appropriate to the data that you
are dealing with. You should implement this sorting algorithm yourself.
The executable name of this program must be wSort and it must be able to be executed
using the command:
> ./wSort data_2012 sorted pagesize
where data
2012 is the input ﬁle; sorted is an output ﬁle to which your converted data is
written; and pagesize is an integer specifying the size of a page of the ﬁle (that is, the number
of records that can be stored per page).
Like wHeap, your wSort program must not output anything to stdout.
4.4 Search on a sorted ﬁle (25%)
Write a program to simulate searching over a sorted ﬁle, with different assumptions for the size
of ﬁle pages.
Write a program to perform equality search operations on the sorted ﬁle produced by your
wSort program in Section 4.3. The executable name of this program must be sSort and it must
be able to be executed using the command:
> ./sSort search_2012 sorted pagesize
where search
2012 is the name of the ﬁle containing the ID keys to be searched for; sorted
is the output ﬁle of our wSort program; and pagesize is an integer value that speciﬁes the size
of the disk page that you are simulating.
Your program must take advantage of the assumption that sorted is a ﬁle whose structure
has been created by sorting on the ID key. Your program should read in required parts of the
ﬁle, one “page” at a time. For example, if the pagesize parameter is 100, your program should
fetch 100 records in a single read from disk. These can then be scanned, in-memory, for a
match.
If a match is found, the program must print the matching record to stdout. If no match is
found, a suitable message should be printed. In addition, the program must output the total time
taken to do all the search operations in milliseconds to stdout. For example, if the time taken
to do the reading is 123.45 ms, the output would be:
Time: 123.45 ms
5 Experiments and Analysis
In this section, you will be asked to carry out a number of experiments and to analyse your
results. Create a ﬁle called report.txt. Use this ﬁle to record your answers to the following
questions.
4
5.1 Searching with a heap ﬁle (5%)
• Put a heading “5.1: Equality search (heap ﬁle)” in your report.
• Run your sHeap program with pagesize settings of: 100; 1,000; 10,000. For each of
these pagesize settings, run your program 10 times. Create a table in your report, and
record the timing results, including the date and time of each run.
• Calculate the average and standard deviation of the running times for each pagesize, and
5.2 Searching with a sorted ﬁle (5%)
• Put a heading “5.2: Equality search (sorted ﬁle)” in your report.
• Run your sSort program with pagesize settings of: 100; 1,000; 10,000. For each of
these pagesize settings, run your program 10 times. Record the timing results in a table
in your report, including the date and time of each run.
• Calculate the average and standard deviation of the running times, for each pagesize,
and record them in your report.
5.3 Comparison of approaches (5%)
tages of the heap and sorted ﬁle organisations. Do the trends change for different page
sizes? Are the results what you would have expected to see based on your theoretical
understanding of these ﬁle organisations? Why or why not? Limit your discussion to half
a page.
5.4 Theory (10 +5 = 15%)
1. Suppose that instead of equality searches, you were carrying out range searches. Would
you expect your results to change? Which of the ﬁle organisations would you prefer? As
part of your discussion, you should demonstrate your understanding of the properties of
the ﬁle organisations. Limit your discussion to half a page.
2. Now, suppose that instead of equality searches, you were inserting new records into the
ﬁle. Which of the ﬁle organisations would you prefer? As part of your discussion, you
should demonstrate your understanding of the properties of the ﬁle organisations. Limit
your discussion to half a page.
5
6 Submission
Before you submit anything, read through the assignment speciﬁcations again carefully, especially
Section 3. Check that you have followed all instructions. Also check that you have
attempted all parts of all questions.
When
The assignment is due at 11:59 pm on Friday 13 April 2012.
What
You must submit all source code, your report ﬁles, and the Makefile. Do not submit executables,
object ﬁles, or data.
How
You need to submit your source code, report ﬁles, and Makefile using weblearn. Use the
following process:
1. Put all your submission ﬁles in a directory.
2. Run gtar to compress your ﬁles using:
> gtar -zcvf files.tar.gz Makefile *.h *.c *.txt
You must check your submission by running:
> gtar -cvf files.tar.gz
Note that the ﬁlename must be files.tar.gz
Then upload the ﬁles.tar.gz ﬁle to weblearn. The onus is on you to check that your
Your weblearn submission must have a timestamp of 11:59 pm on Friday 13 April 2012 (or
earlier). Late submissions should be submitted using the same weblearn procedure, but will be
penalised by 10% of total possible marks per days for assignments that are late 1 to 5 days late.
For assignments that are more than 5 days late, a penalty of 100% will apply. See the course
guide for further information.
6

JE75

“The presented piece of writing is a good example how the academic paper should be written. However, the text can’t be used as a part of your own and submitted to your professor – it will be considered as plagiarism.

But you can order it from our service and receive complete high-quality custom paper.  Our service offers Information Technology  essay sample that was written by professional writer. If you like one, you have an opportunity to buy a similar paper. Any of the academic papers will be written from scratch, according to all customers’ specifications, expectations and highest standards.”