Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
eabdullin committed Aug 3, 2015
2 parents e44bdfb + 12ff2f8 commit cac21bc
Show file tree
Hide file tree
Showing 13 changed files with 24,250 additions and 0 deletions.
21 changes: 21 additions & 0 deletions README.txt
@@ -0,0 +1,21 @@
Tools for computing distributed representtion of words
------------------------------------------------------

We provide an implementation of the Continuous Bag-of-Words (CBOW) and the Skip-gram model (SG), as well as several demo scripts.

Given a text corpus, the word2vec tool learns a vector for every word in the vocabulary using the Continuous
Bag-of-Words or the Skip-Gram neural network architectures. The user should to specify the following:
- desired vector dimensionality
- the size of the context window for either the Skip-Gram or the Continuous Bag-of-Words model
- training algorithm: hierarchical softmax and / or negative sampling
- threshold for downsampling the frequent words
- number of threads to use
- the format of the output word vector file (text or binary)

Usually, the other hyper-parameters such as the learning rate do not need to be tuned for different training sets.

The script demo-word.sh downloads a small (100MB) text corpus from the web, and trains a small word vector model. After the training
is finished, the user can interactively explore the similarity of the words.

More information about the scripts is provided at https://code.google.com/p/word2vec/

44 changes: 44 additions & 0 deletions Word2Vec.Net.sln
@@ -0,0 +1,44 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2013
VisualStudioVersion = 12.0.31101.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Word2Vec.Net", "Word2Vec.Net\Word2Vec.Net.csproj", "{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Work2VecConsoleApp", "Work2VecConsoleApp\Work2VecConsoleApp.csproj", "{7724C534-6B39-4DA6-92CD-DA2155EE944C}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Debug|Mixed Platforms = Debug|Mixed Platforms
Debug|Win32 = Debug|Win32
Release|Any CPU = Release|Any CPU
Release|Mixed Platforms = Release|Mixed Platforms
Release|Win32 = Release|Win32
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}.Debug|Win32.ActiveCfg = Debug|Any CPU
{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}.Release|Any CPU.Build.0 = Release|Any CPU
{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}.Release|Mixed Platforms.Build.0 = Release|Any CPU
{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}.Release|Win32.ActiveCfg = Release|Any CPU
{7724C534-6B39-4DA6-92CD-DA2155EE944C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{7724C534-6B39-4DA6-92CD-DA2155EE944C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{7724C534-6B39-4DA6-92CD-DA2155EE944C}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
{7724C534-6B39-4DA6-92CD-DA2155EE944C}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
{7724C534-6B39-4DA6-92CD-DA2155EE944C}.Debug|Win32.ActiveCfg = Debug|Any CPU
{7724C534-6B39-4DA6-92CD-DA2155EE944C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{7724C534-6B39-4DA6-92CD-DA2155EE944C}.Release|Any CPU.Build.0 = Release|Any CPU
{7724C534-6B39-4DA6-92CD-DA2155EE944C}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
{7724C534-6B39-4DA6-92CD-DA2155EE944C}.Release|Mixed Platforms.Build.0 = Release|Any CPU
{7724C534-6B39-4DA6-92CD-DA2155EE944C}.Release|Win32.ActiveCfg = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
36 changes: 36 additions & 0 deletions Word2Vec.Net/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("Word2Vec.Net")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("Word2Vec.Net")]
[assembly: AssemblyCopyright("Copyright © 2015")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]

// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("b2bcc46d-a28b-40a4-a873-f0b1ffe65181")]

// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
25 changes: 25 additions & 0 deletions Word2Vec.Net/VocubWord.cs
@@ -0,0 +1,25 @@
using System.Collections.Generic;

namespace Word2Vec.Net
{
internal class VocubWord
{
public long Cn { get; set; }
public string Word { get; set; }
public char[] Code { get; set; }
public char CodeLen { get; set; }
public int[] Point { get; set; }
}
// Used later for sorting by word counts
internal class VocubComparer : IComparer<VocubWord>
{
public int Compare(VocubWord x, VocubWord y)
{
if (x == null && y == null)
return 0;
if (y == null) return -1;
if (x == null) return 1;
return (int)(y.Cn - x.Cn );
}
}
}
56 changes: 56 additions & 0 deletions Word2Vec.Net/Word2Vec.Net.csproj
@@ -0,0 +1,56 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>Word2Vec.Net</RootNamespace>
<AssemblyName>Word2Vec.Net</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<DocumentationFile>bin\Debug\Word2Vec.Net.XML</DocumentationFile>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="VocubWord.cs" />
<Compile Include="Word2Vec.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Word2VecBuilder.cs" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

0 comments on commit cac21bc

Please sign in to comment.