Commit 5b855ec0 authored by abdullh.alsoleman's avatar abdullh.alsoleman

Count-URL-access-frequency

parent 7b6894e2
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/
### IntelliJ IDEA ###
.idea/modules.xml
.idea/jarRepositories.xml
.idea/compiler.xml
.idea/libraries/
*.iws
*.iml
*.ipr
### Eclipse ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/
### VS Code ###
.vscode/
### Mac OS ###
.DS_Store
\ No newline at end of file
This diff is collapsed.
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ArtifactsWorkspaceSettings">
<artifacts-to-build>
<artifact name="count-of-URL-access-frequency:jar" />
</artifacts-to-build>
</component>
<component name="AutoImportSettings">
<option name="autoReloadType" value="SELECTIVE" />
</component>
<component name="ChangeListManager">
<list default="true" id="7328fa02-97f7-4ade-a84b-5990c24abe66" name="Changes" comment="">
<change afterPath="$PROJECT_DIR$/.gitignore" afterDir="false" />
<change afterPath="$PROJECT_DIR$/.idea/artifacts/count_of_URL_access_frequency_jar.xml" afterDir="false" />
<change afterPath="$PROJECT_DIR$/.idea/encodings.xml" afterDir="false" />
<change afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
<change afterPath="$PROJECT_DIR$/.idea/vcs.xml" afterDir="false" />
<change afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change afterPath="$PROJECT_DIR$/pom.xml" afterDir="false" />
<change afterPath="$PROJECT_DIR$/src/main/java/Count/URLAccessFrequency.java" afterDir="false" />
<change afterPath="$PROJECT_DIR$/src/main/java/Count/URLMapper.java" afterDir="false" />
<change afterPath="$PROJECT_DIR$/src/main/java/Count/URLReducer.java" afterDir="false" />
<change afterPath="$PROJECT_DIR$/src/main/resources/META-INF/MANIFEST.MF" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="MarkdownSettingsMigration">
<option name="stateVersion" value="1" />
</component>
<component name="ProjectId" id="2fsPaUy1NpKm4ojqDuB5UFP5wjY" />
<component name="ProjectLevelVcsManager" settingsEditedManually="true" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent"><![CDATA[{
"keyToString": {
"RunOnceActivity.OpenProjectViewOnStart": "true",
"RunOnceActivity.ShowReadmeOnStart": "true",
"SHARE_PROJECT_CONFIGURATION_FILES": "true",
"last_opened_file_path": "C:/Users/Abdullah/Downloads/count-of-URL-access-frequency/src/main/java",
"project.structure.last.edited": "Artifacts",
"project.structure.proportion": "0.0",
"project.structure.side.proportion": "0.2"
}
}]]></component>
<component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="C:\Users\Abdullah\Downloads\count-of-URL-access-frequency\src\main\java" />
</key>
</component>
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="7328fa02-97f7-4ade-a84b-5990c24abe66" name="Changes" comment="" />
<created>1714586879054</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1714586879054</updated>
</task>
<servers />
</component>
<component name="Vcs.Log.Tabs.Properties">
<option name="TAB_STATES">
<map>
<entry key="MAIN">
<value>
<State />
</value>
</entry>
</map>
</option>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>count-of-URL-access-frequency</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.2.1</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package Count;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class URLAccessFrequency {
public static void main(String[] args) throws Exception {
// Create a new Configuration object to hold Hadoop configuration settings
Configuration conf = new Configuration();
// Parse the command line arguments to get input and output file paths
String[] files = new GenericOptionsParser(conf, args).getRemainingArgs();
Path input = new Path(files[0]); // Input file path
Path output = new Path(files[1]); // Output file path
// Create a new MapReduce job with the configuration and a job name
Job job = new Job(conf, "URL_Access_Frequency");
// Set the main class for the job JAR file
job.setJarByClass(URLAccessFrequency.class);
// Set the Mapper and Reducer classes for the job
job.setMapperClass(URLMapper.class); // Mapper class
job.setCombinerClass(URLReducer.class); // Optional combiner class for optimization
job.setReducerClass(URLReducer.class); // Reducer class
// Set the output key and value classes for the job
job.setOutputKeyClass(Text.class); // Output key class
job.setOutputValueClass(IntWritable.class); // Output value class
// Set the input and output file paths for the job
FileInputFormat.addInputPath(job, input); // Input file path
FileOutputFormat.setOutputPath(job, output); // Output file path
// Execute the job and wait for completion
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
package Count;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class URLMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text url = new Text();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// Split the input value by space
String[] fields = value.toString().split(" ");
if (fields.length >= 1) {
// Assuming URL is the first field
url.set(fields[0]);
// Emit (url, 1) as key-value pair
context.write(url, one);
}
}
}
package Count;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class URLReducer extends Reducer<Text, IntWritable, Text, DoubleWritable> {
private DoubleWritable result = new DoubleWritable();
// Variable to store the total count of URL accesses
private long totalCount = 0;
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
// Iterate through the values (counts) for the current URL key
for (IntWritable val : values) {
// Sum up the counts
sum += val.get();
// Increment the total count
totalCount += val.get();
}
// Calculate the percentage frequency of the URL
double frequency = ((double) sum / totalCount) * 100;
result.set(frequency); // Set the result as the calculated frequency
context.write(key, result); // Write the URL and its frequency to the output
}
}
Manifest-Version: 1.0
Main-Class: Count.URLAccessFrequency
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment